In [144]:
import pandas as pd
import numpy as np
import math
from scipy import stats
import random
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, KFold, RandomizedSearchCV

In [None]:
"""
Assuming that we have our data, this will be the
data preprocessing step in which we:
1- Clean the dataset
2- Extract features
"""

In [80]:
class Region:
    
    def __init__(self, num_pins, num_modules, total_area, num_tracks, net_cuts_num):
        self.num_pins = num_pins
        self.num_modules = num_modules
        self.total_area = total_area
        self.num_tracks = num_tracks
        self.net_cuts_num = net_cuts_num
        
    def to_dict(self):
        return {
            'num_pins': self.num_pins,
            'num_modules': self.num_modules,
            'total_area': self.total_area,
            'num_tracks': self.num_tracks,
            'net_cut_num': self.net_cuts_num
        }

In [78]:
pin_names_file = open('ibm02/ibm02.net', 'r')
pin_names_area = open('ibm02/ibm02.are', 'r')

pins_info_area = {}
modules_pins = {}
for num_line, line in enumerate(pin_names_file.readlines()):
    if num_line == 4:
        num_of_modules = int(line)
    if num_line == 2:
        num_nets = int(line)
    if num_line == 1:
        num_of_pins = int(line)
    if not line.split(' ')[0].replace('\n', '').isdigit():
        if line.split(' ')[0] not in modules_pins:
            modules_pins[line.split(' ')[0]] = 1
        else:
            modules_pins[line.split(' ')[0]]+=1

for line in pin_names_area.readlines():
    info = line.split(' ')
    pins_info_area[info[0]] = int(info[1])

pin_names_file.close()
pin_names_area.close()

# print(len(pins_info_area))
# print(len(modules_pins))
# print(num_of_modules)
# print(num_of_pins)

data = pd.DataFrame()

sorted_pins_area = dict(sorted(pins_info_area.items()))
sorted_modules = dict(sorted(modules_pins.items()))

data['modules_names'] = sorted_pins_area.keys()
data['area'] = sorted_pins_area.values()
data['pin_connections'] = sorted_modules.values()
data

Unnamed: 0,modules_names,area,pin_connections
0,a0,224,6
1,a1,64,3
2,a10,96,2
3,a100,64,3
4,a1000,224,6
...,...,...,...
19596,p95,0,1
19597,p96,0,1
19598,p97,0,1
19599,p98,0,1


In [100]:
chunks_data = [data[i:i+16] for i in range(0,data.shape[0],16)]

final_dataset = []

for i in chunks_data:
    chunk = pd.DataFrame(i)
    total_pins = chunk['pin_connections'].sum()
    num_modules = len(chunk)
    total_area = int(chunk['area'].sum() + chunk['area'].mean())
    num_tracks = int(chunk['area'].mean())
    net_cuts_num = num_nets//total_pins
    
    new_region = Region(total_pins, num_modules, total_area, num_tracks, net_cuts_num)
    final_dataset.append(new_region)

final = pd.DataFrame.from_records([s.to_dict() for s in final_dataset])
final = final[final['total_area'] != 0]
final

Unnamed: 0,num_pins,num_modules,total_area,num_tracks,net_cut_num
0,70,16,2380,140,279
1,63,16,1870,110,310
2,72,16,2210,130,272
3,65,16,2958,174,301
4,67,16,2108,124,292
...,...,...,...,...,...
1204,57,16,1870,110,343
1205,61,16,10234,602,321
1206,72,16,2380,140,272
1207,63,16,2448,144,310


In [127]:
labels = []
for index, row in final.iterrows():
    labels.append((int(row['num_pins'])*int(row['num_modules'])*int(row['num_tracks'])*int(row['net_cut_num']))/(int(row['total_area'])))

labels = [float(i)/max(labels) for i in labels]
final["label"] = labels

final

Unnamed: 0,num_pins,num_modules,total_area,num_tracks,net_cut_num,label
0,70,16,2380,140,279,0.997243
1,63,16,1870,110,310,0.997243
2,72,16,2210,130,272,1.000000
3,65,16,2958,174,301,0.999030
4,67,16,2108,124,292,0.998979
...,...,...,...,...,...,...
1204,57,16,1870,110,343,0.998315
1205,61,16,10234,602,321,0.999847
1206,72,16,2380,140,272,1.000000
1207,63,16,2448,144,310,0.997243


In [141]:
def mean_relative_error(actual, predicted):
    rel_err=[]
    actual,predicted=np.array(actual), np.array(predicted)
    for i in range(0,len(actual)):
        err=abs(actual[i]-predicted[i])/(actual[i]+1)
        rel_err.append(err)
    return np.mean(rel_err)

In [145]:
def run_technique():
    
    n_iter_search = 20
    
    features=final.drop(['label'], axis='columns')
    print(features)
    label=final['label']
    
    train_features, test_features, train_label, test_label = train_test_split(features, label, test_size=0.2)
    
    rf_avg_mre, svr_avg_mre = [], []
    rf_avg_mae, svr_avg_mae = [], []
    rf_avg_rmse, svr_avg_rmse = [], []
    
    for run in range(30):
        print('run:', run+1)
        
        kf = KFold(n_splits=10, shuffle=True)
        i=1
        
        rf_mre, svr_mre = [], []
        rf_mae, svr_mae = [], []
        rf_rmse, svr_rmse = [], []
        
        for train_index, test_index in kf.split(train_features):
            print('fold number:', i)
            i+=1
            
            x_train, x_test = train_features.iloc[train_index], train_features.iloc[test_index]
            y_train, y_test = train_label.iloc[train_index], train_label.iloc[test_index]
            
            params_rf = { 
                            "n_estimators": stats.randint(30, 200),
                            "min_samples_leaf": stats.randint(30, 50),
                            "max_depth": stats.randint(20, 50)
                        }
            
            params_svr = {
                            "C": stats.uniform(2, 10),
                            "gamma": stats.uniform(0.1, 1)
                         }
            
            
            print('RF ** Training ...')
            rand_search_rf = RandomizedSearchCV(RandomForestRegressor(), params_rf, cv=n_iter_search)
            rand_search_rf.fit(x_train, y_train)
            best_tuned_rf = rand_search_rf.best_estimator_
            print('RF ** Predicting ...')
            y_pred = best_tuned_rf.predict(test_features)
            rf_mre.append(mean_relative_error(test_label, y_pred))
            rf_mae.append(metrics.mean_absolute_error(test_label, y_pred))
            rf_rmse.append(math.sqrt(metrics.mean_squared_error(test_label, y_pred)))
            
            print('SVM ** Training ...')
            rand_search_svm = RandomizedSearchCV(SVR(), params_svr, cv=n_iter_search)
            rand_search_svm.fit(x_train, y_train)
            best_tuned_svm = rand_search_svm.best_estimator_
            print('SVM ** Predicting ...')
            y_pred = best_tuned_svm.predict(test_features)
            svr_mre.append(mean_relative_error(test_label, y_pred))
            svr_mae.append(metrics.mean_absolute_error(test_label, y_pred))
            svr_rmse.append(math.sqrt(metrics.mean_squared_error(test_label, y_pred)))
            
            
        rf_avg_mre.append(np.mean(rf_mre))
        rf_avg_mae.append(np.mean(rf_mae))
        rf_avg_rmse.append(np.mean(rf_rmse))
        
        svr_avg_mre.append(np.mean(svr_mre))
        svr_avg_mae.append(np.mean(svr_mae))
        svr_avg_rmse.append(np.mean(svr_rmse))
        
    print(rf_avg_mre)
    print('########################################################')
    print(rf_avg_mae)
    print('########################################################')
    print(rf_avg_rmse)
    print('########################################################')
    print(svr_avg_mre)
    print('########################################################')
    print(svr_avg_mae)
    print('########################################################')
    print(svr_avg_rmse)
    print('########################################################')

In [146]:
run_technique()

      num_pins  num_modules  total_area  num_tracks  net_cut_num
0           70           16        2380         140          279
1           63           16        1870         110          310
2           72           16        2210         130          272
3           65           16        2958         174          301
4           67           16        2108         124          292
...        ...          ...         ...         ...          ...
1204        57           16        1870         110          343
1205        61           16       10234         602          321
1206        72           16        2380         140          272
1207        63           16        2448         144          310
1208        62           16        1938         114          315

[1209 rows x 5 columns]
run: 1
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predictin

SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
run: 9
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
f

SVM ** Predicting ...
run: 17
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...


SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
run: 26
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...
fold number: 1
RF ** Training ...
RF ** Predicting ...
SVM ** Training ...
SVM ** Predicting ...


In [151]:
rf_avg_rme_f = [0.00022673232685482898, 0.00022718032962343917, 0.0002288947486583502, 0.00022543658224427276, 0.000231753836056309, 0.0002290571025770554, 0.0002264070145062792, 0.0002343654434227564, 0.00023021947379753922, 0.0002306028100463288, 0.00022751336826033682, 0.00023632727687810618, 0.00023147246975015935, 0.0002264008876608273, 0.00022784947150752082, 0.00022795791298740317, 0.00023089929333886054, 0.00022681120837892682, 0.00022579742273455306, 0.00022971129658322094, 0.00022838513715029395, 0.00022969144263354147, 0.00022644555508002467, 0.00022525914202087553, 0.00023003686379063112, 0.0002287598698519494, 0.00022945271860208465, 0.00022924075122040963, 0.00022903698048729152, 0.0002305410042347217]
print(sum(rf_avg_rme_f)/len(rf_avg_rme_f))
rf_avg_mae_f = [0.00045315851755568456, 0.00045405514867331316, 0.0004574817957999634, 0.000450568750928808, 0.00046319661946956205, 0.0004578052287011553, 0.0004525084734942138, 0.0004684160114346996, 0.0004601296047993876, 0.0004608962486227373, 0.0004547216990455825, 0.0004723367989965273, 0.00046263344201204453, 0.0004524958095985662, 0.00045539307896689206, 0.0004556103783433041, 0.0004614888910341716, 0.0004533162864425399, 0.0004512912696654603, 0.0004591119090888148, 0.0004564626169608219, 0.00045907421188432927, 0.0004525860710773093, 0.00045021394211049095, 0.00045976346594356674, 0.00045721126122884976, 0.0004585947886797408, 0.0004581723132928837, 0.0004577652721551674, 0.00046077169050697875]
print(sum(rf_avg_mae_f)/len(rf_avg_mae_f))
rf_avg_rmse_f = [0.0006053360020500687, 0.0006068365035848071, 0.0006111823213281721, 0.0006031326014587148, 0.0006154310554152376, 0.000612735424210512, 0.0006058578983953601, 0.0006256592258906989, 0.0006142014826202968, 0.0006124792739166749, 0.0006090395282091573, 0.0006300220825698665, 0.0006187050738403413, 0.0006070682006353336, 0.0006104762079448894, 0.0006096865152715707, 0.000615328664610573, 0.0006049264273033895, 0.000605560368262187, 0.0006152071764155989, 0.0006081553289509185, 0.0006146467481137867, 0.0006049592906248668, 0.000602998229984753, 0.0006161213407963103, 0.0006128698263720022, 0.0006147177643103753, 0.0006123316715530723, 0.0006115712286045426, 0.0006154164845866685]
print(sum(rf_avg_rmse_f)/ len(rf_avg_rmse_f))
svr_avg_rme_f = [0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005349085951226025, 0.0005349085951226025, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756, 0.0005350452212203756]
print(sum(svr_avg_rme_f)/ len(svr_avg_rme_f))
svr_avg_mae_f = [0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531492, 0.0010692866161616035, 0.0010692866161616037, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531494, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531494, 0.0010695609166531492, 0.0010695609166531494, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531494, 0.0010695609166531492, 0.0010695609166531494, 0.0010695609166531492, 0.0010695609166531492, 0.0010695609166531494, 0.0010695609166531492]
print(sum(svr_avg_mae_f)/ len(svr_avg_mae_f))
svr_avg_rmse_f = [0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196163, 0.0012923775033414134, 0.0012923775033414134, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196165, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196165, 0.0012931863283196163, 0.0012931863283196165, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196165, 0.0012931863283196163, 0.0012931863283196165, 0.0012931863283196163, 0.0012931863283196163, 0.0012931863283196165, 0.0012931863283196163]
print(sum(svr_avg_rmse_f)/ len(svr_avg_rmse_f))

0.0002289413246979633
0.0004575743865504522
0.0006117553315943582
0.0005350361128138571
0.0010695426299537133
0.0012931324066544029
