In [52]:
import numpy as np
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate

In [53]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [54]:
def error_report(model, parameters):
    model_GS = GridSearchCV(model, param_grid=parameters, cv = 3, iid = False, return_train_score=True, n_jobs=-1)
    model_RS = RandomizedSearchCV(estimator = model, param_distributions = parameters, cv=3, n_iter=4, iid=False, n_jobs=-1)
    scores_GS = cross_validate(model_GS, x_set, y_set, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=5)
    scores_RS = cross_validate(model_RS, x_set, y_set, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=5)
    return scores_GS, scores_RS

In [55]:
def make_line(model:str, search_strategy:str, results):
    for key, value in results.items():
        results[key] = results[key].mean()
    results['Model'] = model
    results['Search_strategy'] = search_strategy
    return results
    

In [56]:
namesDict = ["Vendor Name", "Model Name", "MYCT", "MMIN", "MMAX", "CACH", "CHMIN", "CHMAX", "PRP", "ERP"]
data_set = pd.read_csv('machine.data', names=namesDict)
data_set.head()

Unnamed: 0,Vendor Name,Model Name,MYCT,MMIN,MMAX,CACH,CHMIN,CHMAX,PRP,ERP
0,adviser,32/60,125,256,6000,256,16,128,198,199
1,amdahl,470v/7,29,8000,32000,32,8,32,269,253
2,amdahl,470v/7a,29,8000,32000,32,8,32,220,253
3,amdahl,470v/7b,29,8000,32000,32,8,32,172,253
4,amdahl,470v/7c,29,8000,16000,32,8,16,132,132


In [57]:
data_set = data_set.iloc[:,2:]
data_set = data_set.iloc[:,:7]
data_set.head()

Unnamed: 0,MYCT,MMIN,MMAX,CACH,CHMIN,CHMAX,PRP
0,125,256,6000,256,16,128,198
1,29,8000,32000,32,8,32,269
2,29,8000,32000,32,8,32,220
3,29,8000,32000,32,8,32,172
4,29,8000,16000,32,8,16,132


In [58]:
x_set = data_set.iloc[:,:6]
y_set = data_set.iloc[:, 6:]
y_set= np.ravel(y_set)

In [59]:
results = pd.DataFrame(columns = ['Model','Search_strategy','train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error', 'test_neg_mean_absolute_error', 'test_neg_mean_squared_error', 'test_neg_median_absolute_error', 'fit_time', 'score_time'])


In [60]:
results.head()

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time


In [61]:
from sklearn import linear_model

## Ridge Regression


In [62]:
parameters = {'alpha':[0.1, 0.5, 1, 2, 3, 5, 8, 13, 1000, 5000]
             }

res_GS, res_RS = error_report(linear_model.Ridge(), parameters=parameters)

res_GS = make_line('Ridge Regression', 'GridSearchCV', res_GS)
res_RS = make_line('Ridge Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

## Bayesian Regression

In [64]:
parameters = {
    'n_iter': [100, 300, 500],
    'alpha_1': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'alpha_2': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_1':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_2':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0]
} 
res_GS, res_RS = error_report(linear_model.BayesianRidge(), parameters=parameters)

res_GS = make_line('Bayesian Regression', 'GridSearchCV', res_GS)
res_RS = make_line('Bayesian Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

## Logistic Regression

In [65]:
parameters={
    'solver': ['lbfgs'],
    'C':[0.1, 1.0, 2, 5, 13, 21],
    'max_iter':[100, 300, 500]
}
res_GS, res_RS = error_report(linear_model.LogisticRegression(), parameters=parameters)

res_GS = make_line('Logistic Regression', 'GridSearchCV', res_GS)
res_RS = make_line('Logistic Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)















## Perceptron


In [66]:
parameters= {
    'alpha':[1e-5, 1e-3, 0.5, 1, 8, 21],
    'max_iter': [200, 500, 1000]
}
res_GS, res_RS = error_report(linear_model.Perceptron(tol=0.21), parameters=parameters)

res_GS = make_line('Perceptron', 'GridSearchCV', res_GS)
res_RS = make_line('Perceptron', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)



## SGDRegressor

In [67]:
parameters={
    'loss':['squared_loss', 'huber', 'squared_epsilon_insensitive'],
    'penalty':['l1', 'l2', 'elasticnet'],
    'alpha':[1e-8, 1e-3, 0.5, 3, 8],
    'max_iter':[200, 300, 800]
    
}
res_GS, res_RS = error_report(linear_model.SGDRegressor(tol=1e-3), parameters=parameters)
res_GS = make_line('SGD Regressor', 'GridSearchCV', res_GS)
res_RS = make_line('SGD Regressor', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [68]:
results.head(100)


Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-36.72983,-3270.6882,-25.67606,-42.01974,-6091.45291,-26.62268,3.52303,0.0
1,Ridge Regression,RandomSearchCV,-36.68094,-3257.25251,-25.5062,-41.9096,-6107.48904,-26.77969,0.03747,0.00313
2,Bayesian Regression,GridSearchCV,-38.16985,-3832.71593,-25.17923,-42.24818,-6030.99658,-27.53795,40.77733,0.00299
3,Bayesian Regression,RandomSearchCV,-38.18553,-3813.15218,-25.47964,-41.65659,-5967.31271,-27.7338,0.08632,0.00498
4,Logistic Regression,GridSearchCV,-14.04097,-1007.41555,-3.1,-29.31528,-5813.72028,-14.4,61.05661,0.00559
5,Logistic Regression,RandomSearchCV,-14.08769,-983.11719,-2.9,-27.74694,-5703.70861,-14.0,15.89632,0.00418
6,Perceptron,GridSearchCV,-133.45814,-44895.37628,-110.5,-105.34472,-32151.16417,-52.6,0.52559,0.0004
7,Perceptron,RandomSearchCV,-133.45814,-44895.37628,-110.5,-105.34472,-32151.16417,-52.6,0.18711,0.0018
8,SGD Regressor,GridSearchCV,-3120.41046,-38212992.31923,-2075.74839,-3560.94295,-56766758.57099,-2288.57323,0.75303,0.0008
9,SGD Regressor,RandomSearchCV,-9.341227339456483e+16,-4.407283794787204e+34,-6.083124868915539e+16,-7.327653268617904e+16,-2.7890454656775296e+34,-4.881661126966064e+16,0.0466,0.0018


In [75]:
pd.options.display.float_format = '{5:.5f}'.format

In [89]:
results.iloc[:, 2:] = abs(results.iloc[:, 2:])

In [77]:
def myhighlight(x):
    return ['background-color: red' if v == x.max() else ('background-color: green' if v == x.min() else '') for v in x]

In [78]:
final_results = results.style.apply(myhighlight, subset=['train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error','test_neg_mean_absolute_error','test_neg_mean_squared_error','test_neg_median_absolute_error','fit_time','score_time'])

In [79]:
final_results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,36.7298,3270.69,25.6761,42.0197,6091.45,26.6227,3.52303,0.0
1,Ridge Regression,RandomSearchCV,36.6809,3257.25,25.5062,41.9096,6107.49,26.7797,0.037467,0.00312982
2,Bayesian Regression,GridSearchCV,38.1698,3832.72,25.1792,42.2482,6031.0,27.5379,40.7773,0.00299339
3,Bayesian Regression,RandomSearchCV,38.1855,3813.15,25.4796,41.6566,5967.31,27.7338,0.0863237,0.00498185
4,Logistic Regression,GridSearchCV,14.041,1007.42,3.1,29.3153,5813.72,14.4,61.0566,0.00558553
5,Logistic Regression,RandomSearchCV,14.0877,983.117,2.9,27.7469,5703.71,14.0,15.8963,0.00417638
6,Perceptron,GridSearchCV,133.458,44895.4,110.5,105.345,32151.2,52.6,0.525586,0.000398731
7,Perceptron,RandomSearchCV,133.458,44895.4,110.5,105.345,32151.2,52.6,0.187106,0.0017952
8,SGD Regressor,GridSearchCV,3120.41,38213000.0,2075.75,3560.94,56766800.0,2288.57,0.753033,0.000797939
9,SGD Regressor,RandomSearchCV,9.34123e+16,4.40728e+34,6.08312e+16,7.32765e+16,2.78905e+34,4.88166e+16,0.0465974,0.0018023
