In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,cross_val_score,KFold
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_validate
from sklearn import linear_model
from sklearn import preprocessing
import warnings
warnings.filterwarnings('ignore')
cvk=KFold(n_splits=5)

In [2]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [3]:
def scores_report(model, parameters):
    model_GS = GridSearchCV(model, param_grid=parameters, scoring='neg_mean_squared_error', cv = 3, iid = False, return_train_score=True, n_jobs=-1)
    model_RS = RandomizedSearchCV(estimator = model, param_distributions = parameters,scoring='neg_mean_squared_error', cv=3, n_iter=5, iid=False,return_train_score=True, n_jobs=-1)
    scores_GS = cross_validate(model_GS, X, y, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=cvk)
    scores_RS = cross_validate(model_RS,X, y, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=cvk)
    return scores_GS, scores_RS

In [4]:
def result_line(model:str, search_strategy:str, results):
    for key, value in results.items():
        results[key] = results[key].mean()
    results['Model'] = model
    results['Search_strategy'] = search_strategy
    return results    

<h1>MACHINE DB</h1>

In [5]:
namesCol = ["Vendor Name", "Model Name", "MYCT", "MMIN", "MMAX", "CACH", "CHMIN", "CHMAX", "PRP", "ERP"]
df = pd.read_csv("machine.data",names=namesCol )
X=df.iloc[:,2:8]
y=np.ravel(df.iloc[:,8:9])
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3)#,stratify=y)


In [6]:
results = pd.DataFrame(columns = ['Model','Search_strategy','train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error', 'test_neg_mean_absolute_error', 'test_neg_mean_squared_error', 'test_neg_median_absolute_error', 'fit_time', 'score_time'])
pd.options.display.float_format = '{:2,.5f}'.format
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time


<h2 style="color:purple;"><i>Logistic Regression</i></h2>

In [9]:
model1=linear_model.LogisticRegression()
model1.fit(X,y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [10]:
parameters={
    'solver': ['lbfgs'],
    'C':[0.1, 1.0, 2, 3, 5, 13, 21],
    'max_iter':[100, 200, 300, 500]
}
res_GS, res_RS = scores_report(model1, parameters=parameters)

res_GS = result_line('Logistic Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Logistic Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [11]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,-14.29677,-958.82546,-3.2,-58.53833,-17219.19547,-19.4,87.5096,0.0028
1,Logistic Regression,RandomSearchCV,-14.2836,-999.73923,-2.6,-61.64309,-17748.60499,-20.8,19.08596,0.00319


<h2 style="color:dark green;"><i>Ridge Regression</i></h2>

In [12]:
model2=linear_model.Ridge()
model2.fit(X,y)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [13]:
parameters = {'alpha':[0.1, 0.5, 1, 2, 3, 6, 8, 11, 1000, 3000]
             }

res_GS, res_RS = scores_report(model2, parameters=parameters)

res_GS = result_line('Ridge Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Ridge Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [14]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,-14.29677,-958.82546,-3.2,-58.53833,-17219.19547,-19.4,87.5096,0.0028
1,Logistic Regression,RandomSearchCV,-14.2836,-999.73923,-2.6,-61.64309,-17748.60499,-20.8,19.08596,0.00319
2,Ridge Regression,GridSearchCV,-36.66271,-3248.74162,-25.40493,-43.03567,-6302.03602,-27.2379,0.14581,0.00299
3,Ridge Regression,RandomSearchCV,-36.67376,-3246.06384,-25.44607,-43.20924,-6341.13025,-27.13516,0.06782,0.00319


<h2 style="color:dark blue;"><i>Bayesian Regression</i></h2>


In [15]:
model3=linear_model.BayesianRidge()
model3.fit(X,y)

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [16]:
parameters = {
    'n_iter': [100, 300, 500],
    'alpha_1': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'alpha_2': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_1':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_2':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0]
} 
res_GS, res_RS = scores_report(model3, parameters=parameters)

res_GS = result_line('Bayesian Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Bayesian Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [17]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,-14.29677,-958.82546,-3.2,-58.53833,-17219.19547,-19.4,87.5096,0.0028
1,Logistic Regression,RandomSearchCV,-14.2836,-999.73923,-2.6,-61.64309,-17748.60499,-20.8,19.08596,0.00319
2,Ridge Regression,GridSearchCV,-36.66271,-3248.74162,-25.40493,-43.03567,-6302.03602,-27.2379,0.14581,0.00299
3,Ridge Regression,RandomSearchCV,-36.67376,-3246.06384,-25.44607,-43.20924,-6341.13025,-27.13516,0.06782,0.00319
4,Bayesian Regression,GridSearchCV,-36.65246,-3335.91773,-25.85399,-42.10105,-6057.04935,-27.34335,25.49982,0.00279
5,Bayesian Regression,RandomSearchCV,-36.65599,-3270.44452,-25.39773,-42.0119,-6109.86332,-27.4424,0.073,0.00279


<h2 style="color:dark orange;"><i>SGD Regressor</i></h2>

In [18]:
model4=linear_model.SGDRegressor(eta0=0.000001)
model4.fit(X,y)

SGDRegressor(alpha=0.0001, average=False, early_stopping=False, epsilon=0.1,
       eta0=1e-06, fit_intercept=True, l1_ratio=0.15,
       learning_rate='invscaling', loss='squared_loss', max_iter=None,
       n_iter=None, n_iter_no_change=5, penalty='l2', power_t=0.25,
       random_state=None, shuffle=True, tol=None, validation_fraction=0.1,
       verbose=0, warm_start=False)

In [19]:
parameters={
    'loss':['squared_loss', 'huber', 'squared_epsilon_insensitive'],
    'penalty':['l1', 'l2', 'elasticnet'],
    'alpha':[1e-8, 1e-3, 0.5, 3, 8],
    'max_iter':[200, 300, 800]
    
}
res_GS, res_RS = scores_report(model4, parameters=parameters)

res_GS = result_line('SGD Regressor', 'GridSearchCV', res_GS)
res_RS = result_line('SGD Regressor', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [20]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,-14.29677,-958.82546,-3.2,-58.53833,-17219.19547,-19.4,87.5096,0.0028
1,Logistic Regression,RandomSearchCV,-14.2836,-999.73923,-2.6,-61.64309,-17748.60499,-20.8,19.08596,0.00319
2,Ridge Regression,GridSearchCV,-36.66271,-3248.74162,-25.40493,-43.03567,-6302.03602,-27.2379,0.14581,0.00299
3,Ridge Regression,RandomSearchCV,-36.67376,-3246.06384,-25.44607,-43.20924,-6341.13025,-27.13516,0.06782,0.00319
4,Bayesian Regression,GridSearchCV,-36.65246,-3335.91773,-25.85399,-42.10105,-6057.04935,-27.34335,25.49982,0.00279
5,Bayesian Regression,RandomSearchCV,-36.65599,-3270.44452,-25.39773,-42.0119,-6109.86332,-27.4424,0.073,0.00279
6,SGD Regressor,GridSearchCV,-38.17227,-7561.836,-15.93617,-39.52775,-7870.9606,-18.57828,5.9455,0.00319
7,SGD Regressor,RandomSearchCV,-37.9237,-7287.67893,-16.40216,-39.3031,-7735.41228,-18.45389,0.25771,0.00379


<h2 style="color:dark violet;"><i>Perceptron</i></h2>

In [21]:
model5=linear_model.Perceptron(tol=0.5)
model5.fit(X,y)

Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=0, shuffle=True, tol=0.5,
      validation_fraction=0.1, verbose=0, warm_start=False)

In [22]:
parameters= {
    'alpha':[1e-5, 1e-3, 0.5, 1, 8, 21],
    'max_iter': [200, 500, 800]
}
res_GS, res_RS = scores_report(model5, parameters=parameters)

res_GS = result_line('Perceptron', 'GridSearchCV', res_GS)
res_RS = result_line('Perceptron', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [23]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,-14.29677,-958.82546,-3.2,-58.53833,-17219.19547,-19.4,87.5096,0.0028
1,Logistic Regression,RandomSearchCV,-14.2836,-999.73923,-2.6,-61.64309,-17748.60499,-20.8,19.08596,0.00319
2,Ridge Regression,GridSearchCV,-36.66271,-3248.74162,-25.40493,-43.03567,-6302.03602,-27.2379,0.14581,0.00299
3,Ridge Regression,RandomSearchCV,-36.67376,-3246.06384,-25.44607,-43.20924,-6341.13025,-27.13516,0.06782,0.00319
4,Bayesian Regression,GridSearchCV,-36.65246,-3335.91773,-25.85399,-42.10105,-6057.04935,-27.34335,25.49982,0.00279
5,Bayesian Regression,RandomSearchCV,-36.65599,-3270.44452,-25.39773,-42.0119,-6109.86332,-27.4424,0.073,0.00279
6,SGD Regressor,GridSearchCV,-38.17227,-7561.836,-15.93617,-39.52775,-7870.9606,-18.57828,5.9455,0.00319
7,SGD Regressor,RandomSearchCV,-37.9237,-7287.67893,-16.40216,-39.3031,-7735.41228,-18.45389,0.25771,0.00379
8,Perceptron,GridSearchCV,-125.96056,-40854.35105,-92.8,-123.92427,-41200.61022,-95.0,0.74521,0.00638
9,Perceptron,RandomSearchCV,-125.96056,-40854.35105,-92.8,-123.92427,-41200.61022,-95.0,0.41649,0.00419


In [38]:
results.iloc[:, 2:] = abs(results.iloc[:, 2:])
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,14.29677,958.82546,3.2,58.53833,17219.19547,19.4,87.5096,0.0028
1,Logistic Regression,RandomSearchCV,14.2836,999.73923,2.6,61.64309,17748.60499,20.8,19.08596,0.00319
2,Ridge Regression,GridSearchCV,36.66271,3248.74162,25.40493,43.03567,6302.03602,27.2379,0.14581,0.00299
3,Ridge Regression,RandomSearchCV,36.67376,3246.06384,25.44607,43.20924,6341.13025,27.13516,0.06782,0.00319
4,Bayesian Regression,GridSearchCV,36.65246,3335.91773,25.85399,42.10105,6057.04935,27.34335,25.49982,0.00279
5,Bayesian Regression,RandomSearchCV,36.65599,3270.44452,25.39773,42.0119,6109.86332,27.4424,0.073,0.00279
6,SGD Regressor,GridSearchCV,38.17227,7561.836,15.93617,39.52775,7870.9606,18.57828,5.9455,0.00319
7,SGD Regressor,RandomSearchCV,37.9237,7287.67893,16.40216,39.3031,7735.41228,18.45389,0.25771,0.00379
8,Perceptron,GridSearchCV,125.96056,40854.35105,92.8,123.92427,41200.61022,95.0,0.74521,0.00638
9,Perceptron,RandomSearchCV,125.96056,40854.35105,92.8,123.92427,41200.61022,95.0,0.41649,0.00419


In [39]:
def color(grid):
    return ['background-color: red' if value == grid.max() else ('background-color: green' if value == grid.min() else '') for value in grid]

In [40]:
results_highlighted= results.style.apply(color, subset=['train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error','test_neg_mean_absolute_error','test_neg_mean_squared_error','test_neg_median_absolute_error','fit_time','score_time'])

In [41]:
results_highlighted

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,14.2968,958.825,3.2,58.5383,17219.2,19.4,87.5096,0.00279512
1,Logistic Regression,RandomSearchCV,14.2836,999.739,2.6,61.6431,17748.6,20.8,19.086,0.003192
2,Ridge Regression,GridSearchCV,36.6627,3248.74,25.4049,43.0357,6302.04,27.2379,0.14581,0.00299177
3,Ridge Regression,RandomSearchCV,36.6738,3246.06,25.4461,43.2092,6341.13,27.1352,0.0678193,0.00319085
4,Bayesian Regression,GridSearchCV,36.6525,3335.92,25.854,42.101,6057.05,27.3434,25.4998,0.00279098
5,Bayesian Regression,RandomSearchCV,36.656,3270.44,25.3977,42.0119,6109.86,27.4424,0.0730045,0.00279279
6,SGD Regressor,GridSearchCV,38.1723,7561.84,15.9362,39.5278,7870.96,18.5783,5.9455,0.00319266
7,SGD Regressor,RandomSearchCV,37.9237,7287.68,16.4022,39.3031,7735.41,18.4539,0.25771,0.00379033
8,Perceptron,GridSearchCV,125.961,40854.4,92.8,123.924,41200.6,95.0,0.745207,0.00638356
9,Perceptron,RandomSearchCV,125.961,40854.4,92.8,123.924,41200.6,95.0,0.416486,0.00418859
