In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,cross_val_score,KFold
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_validate
from sklearn import linear_model
from sklearn import preprocessing
import warnings
warnings.filterwarnings('ignore')
cvk=KFold(n_splits=5)

In [2]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [3]:
def scores_report(model, parameters):
    model_GS = GridSearchCV(model, param_grid=parameters, scoring='neg_mean_squared_error', cv = 3, iid = False, return_train_score=True, n_jobs=-1)
    model_RS = RandomizedSearchCV(estimator = model, param_distributions = parameters,scoring='neg_mean_squared_error', cv=3, n_iter=5, iid=False,return_train_score=True, n_jobs=-1)
    scores_GS = cross_validate(model_GS, X, y, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=cvk)
    scores_RS = cross_validate(model_RS,X, y, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=cvk)
    return scores_GS, scores_RS

In [4]:
def result_line(model:str, search_strategy:str, results):
    for key, value in results.items():
        results[key] = results[key].mean()
    results['Model'] = model
    results['Search_strategy'] = search_strategy
    return results    

<h1>HOUSING DB</h1>

In [21]:
namesCol = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV"]
df = pd.read_csv('housing.data',delimiter='\s+' ,names=namesCol)
X=df.iloc[:,:13].values
y=np.ravel(df.iloc[:,13:])
lab_enc=preprocessing.LabelEncoder()
y= lab_enc.fit_transform(y)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3)#,stratify=y)


In [6]:
results = pd.DataFrame(columns = ['Model','Search_strategy','train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error', 'test_neg_mean_absolute_error', 'test_neg_mean_squared_error', 'test_neg_median_absolute_error', 'fit_time', 'score_time'])
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time


<h2 style="color:purple;"><i>Logistic Regression</i></h2>

In [23]:
model1=linear_model.LogisticRegression()
model1.fit(X,y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [24]:
parameters={
    'solver': ['lbfgs'],
    'C':[0.1, 1.0, 2, 3, 5, 13, 21],
    'max_iter':[100, 200, 300, 500]
}
res_GS, res_RS = scores_report(model1, parameters=parameters)

res_GS = result_line('Logistic Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Logistic Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [25]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,-14.597285,-681.207123,-6.4,-42.435061,-3592.456067,-29.5,389.994207,0.003466
1,Logistic Regression,RandomSearchCV,-14.308196,-667.355696,-5.8,-41.348961,-3307.09767,-29.5,101.226728,0.001796


<h2 style="color:dark green;"><i>Ridge Regression</i></h2>

In [26]:
model2=linear_model.Ridge()
model2.fit(X,y)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [27]:
parameters = {'alpha':[0.1, 0.5, 1, 2, 3, 6, 8, 11, 1000, 3000]
             }

res_GS, res_RS = scores_report(model2, parameters=parameters)

res_GS = result_line('Ridge Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Ridge Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [28]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,-14.597285,-681.207123,-6.4,-42.435061,-3592.456067,-29.5,389.994207,0.003466
1,Logistic Regression,RandomSearchCV,-14.308196,-667.355696,-5.8,-41.348961,-3307.09767,-29.5,101.226728,0.001796
2,Ridge Regression,GridSearchCV,-20.912789,-770.506284,-16.893021,-26.395409,-1183.448821,-21.522174,0.071808,0.000998
3,Ridge Regression,RandomSearchCV,-19.798138,-709.701378,-15.551839,-23.945347,-1026.559697,-20.20611,0.040691,0.001197


<h2 style="color:dark blue;"><i>Bayesian Regression</i></h2>


In [29]:
model3=linear_model.BayesianRidge()
model3.fit(X,y)

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [30]:
parameters = {
    'n_iter': [100, 300, 500],
    'alpha_1': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'alpha_2': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_1':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_2':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0]
} 
res_GS, res_RS = scores_report(model3, parameters=parameters)

res_GS = result_line('Bayesian Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Bayesian Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [31]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,-14.597285,-681.207123,-6.4,-42.435061,-3592.456067,-29.5,389.994207,0.003466
1,Logistic Regression,RandomSearchCV,-14.308196,-667.355696,-5.8,-41.348961,-3307.09767,-29.5,101.226728,0.001796
2,Ridge Regression,GridSearchCV,-20.912789,-770.506284,-16.893021,-26.395409,-1183.448821,-21.522174,0.071808,0.000998
3,Ridge Regression,RandomSearchCV,-19.798138,-709.701378,-15.551839,-23.945347,-1026.559697,-20.20611,0.040691,0.001197
4,Bayesian Regression,GridSearchCV,-19.910525,-709.995997,-15.820315,-24.363661,-1049.78642,-19.598822,18.223869,0.001596
5,Bayesian Regression,RandomSearchCV,-19.759475,-703.355729,-15.506893,-23.942809,-1030.779749,-19.264853,0.051263,0.000999


<h2 style="color:dark orange;"><i>SGD Regressor</i></h2>

In [32]:
model4=linear_model.SGDRegressor(eta0=0.000001)
model4.fit(X,y)

SGDRegressor(alpha=0.0001, average=False, early_stopping=False, epsilon=0.1,
       eta0=1e-06, fit_intercept=True, l1_ratio=0.15,
       learning_rate='invscaling', loss='squared_loss', max_iter=None,
       n_iter=None, n_iter_no_change=5, penalty='l2', power_t=0.25,
       random_state=None, shuffle=True, tol=None, validation_fraction=0.1,
       verbose=0, warm_start=False)

In [33]:
parameters={
    'loss':['squared_loss', 'huber', 'squared_epsilon_insensitive'],
    'penalty':['l1', 'l2', 'elasticnet'],
    'alpha':[1e-8, 1e-3, 0.5, 3, 8],
    'max_iter':[200, 300, 800]
    
}
res_GS, res_RS = scores_report(model4, parameters=parameters)

res_GS = result_line('SGD Regressor', 'GridSearchCV', res_GS)
res_RS = result_line('SGD Regressor', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [34]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,-14.597285,-681.207123,-6.4,-42.435061,-3592.456067,-29.5,389.994207,0.003466
1,Logistic Regression,RandomSearchCV,-14.308196,-667.355696,-5.8,-41.348961,-3307.09767,-29.5,101.226728,0.001796
2,Ridge Regression,GridSearchCV,-20.912789,-770.506284,-16.893021,-26.395409,-1183.448821,-21.522174,0.071808,0.000998
3,Ridge Regression,RandomSearchCV,-19.798138,-709.701378,-15.551839,-23.945347,-1026.559697,-20.20611,0.040691,0.001197
4,Bayesian Regression,GridSearchCV,-19.910525,-709.995997,-15.820315,-24.363661,-1049.78642,-19.598822,18.223869,0.001596
5,Bayesian Regression,RandomSearchCV,-19.759475,-703.355729,-15.506893,-23.942809,-1030.779749,-19.264853,0.051263,0.000999
6,SGD Regressor,GridSearchCV,-31.960363,-1581.488168,-27.551934,-40.666508,-2693.6967,-32.067472,4.377893,0.000997
7,SGD Regressor,RandomSearchCV,-33.314163,-1741.073343,-28.265883,-42.747908,-2960.719077,-34.563756,0.260103,0.000799


<h2 style="color:dark violet;"><i>Perceptron</i></h2>

In [35]:
model5=linear_model.Perceptron(eta0=0.000001)
model5.fit(X,y)

Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1e-06,
      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=0, shuffle=True, tol=None,
      validation_fraction=0.1, verbose=0, warm_start=False)

In [36]:
parameters= {
    'alpha':[1e-5, 1e-3, 0.5, 1, 8, 21],
    'max_iter': [200, 500, 800]
}
res_GS, res_RS = scores_report(model5, parameters=parameters)

res_GS = result_line('Perceptron', 'GridSearchCV', res_GS)
res_RS = result_line('Perceptron', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [37]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,-14.597285,-681.207123,-6.4,-42.435061,-3592.456067,-29.5,389.994207,0.003466
1,Logistic Regression,RandomSearchCV,-14.308196,-667.355696,-5.8,-41.348961,-3307.09767,-29.5,101.226728,0.001796
2,Ridge Regression,GridSearchCV,-20.912789,-770.506284,-16.893021,-26.395409,-1183.448821,-21.522174,0.071808,0.000998
3,Ridge Regression,RandomSearchCV,-19.798138,-709.701378,-15.551839,-23.945347,-1026.559697,-20.20611,0.040691,0.001197
4,Bayesian Regression,GridSearchCV,-19.910525,-709.995997,-15.820315,-24.363661,-1049.78642,-19.598822,18.223869,0.001596
5,Bayesian Regression,RandomSearchCV,-19.759475,-703.355729,-15.506893,-23.942809,-1030.779749,-19.264853,0.051263,0.000999
6,SGD Regressor,GridSearchCV,-31.960363,-1581.488168,-27.551934,-40.666508,-2693.6967,-32.067472,4.377893,0.000997
7,SGD Regressor,RandomSearchCV,-33.314163,-1741.073343,-28.265883,-42.747908,-2960.719077,-34.563756,0.260103,0.000799
8,Perceptron,GridSearchCV,-47.608839,-4046.851758,-39.4,-64.641739,-6654.036847,-61.8,57.56228,0.003593
9,Perceptron,RandomSearchCV,-47.355999,-4036.677931,-39.0,-64.334809,-6672.957639,-59.8,17.984905,0.01137


In [38]:
results.iloc[:, 2:] = abs(results.iloc[:, 2:])

In [39]:
def color(grid):
    return ['background-color: red' if value == grid.max() else ('background-color: green' if value == grid.min() else '') for value in grid]

results_highlighted= results.style.apply(color, subset=['train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error','test_neg_mean_absolute_error','test_neg_mean_squared_error','test_neg_median_absolute_error','fit_time','score_time'])

results_highlighted

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Logistic Regression,GridSearchCV,14.5973,681.207,6.4,42.4351,3592.46,29.5,389.994,0.00346584
1,Logistic Regression,RandomSearchCV,14.3082,667.356,5.8,41.349,3307.1,29.5,101.227,0.00179567
2,Ridge Regression,GridSearchCV,20.9128,770.506,16.893,26.3954,1183.45,21.5222,0.0718083,0.000997877
3,Ridge Regression,RandomSearchCV,19.7981,709.701,15.5518,23.9453,1026.56,20.2061,0.0406912,0.00119691
4,Bayesian Regression,GridSearchCV,19.9105,709.996,15.8203,24.3637,1049.79,19.5988,18.2239,0.00159621
5,Bayesian Regression,RandomSearchCV,19.7595,703.356,15.5069,23.9428,1030.78,19.2649,0.0512627,0.00099926
6,SGD Regressor,GridSearchCV,31.9604,1581.49,27.5519,40.6665,2693.7,32.0675,4.37789,0.000997353
7,SGD Regressor,RandomSearchCV,33.3142,1741.07,28.2659,42.7479,2960.72,34.5638,0.260103,0.000798512
8,Perceptron,GridSearchCV,47.6088,4046.85,39.4,64.6417,6654.04,61.8,57.5623,0.00359325
9,Perceptron,RandomSearchCV,47.356,4036.68,39.0,64.3348,6672.96,59.8,17.9849,0.0113701
