In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate

In [15]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [16]:
def error_report(model, parameters):
    model_GS = GridSearchCV(model, param_grid=parameters, cv = 3, iid = False, return_train_score=True, n_jobs=-1)
    model_RS = RandomizedSearchCV(estimator = model, param_distributions = parameters, cv=3, n_iter=4, iid=False, n_jobs=-1)
    scores_GS = cross_validate(model_GS, x_set, y_set, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=5)
    scores_RS = cross_validate(model_RS, x_set, y_set, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=5)
    return scores_GS, scores_RS

In [17]:
def make_line(model:str, search_strategy:str, results):
    for key, value in results.items():
        results[key] = results[key].mean()
    results['Model'] = model
    results['Search_strategy'] = search_strategy
    return results

In [43]:
namesDict = ["ID", "Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion",
             "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses", "Class"]
data_set = pd.read_csv('breast-cancer-wisconsin.data', names = namesDict)
data_set = data_set.iloc[:, 1:]
data_set = data_set[data_set["Bare Nuclei"] != '?']
data_set['Bare Nuclei'] = data_set['Bare Nuclei'].astype(int)

In [44]:
x_set = data_set.iloc[:, :9]
y_set = data_set.iloc[:, 9]
y_set = np.ravel(y_set)

In [45]:
results = pd.DataFrame(columns = ['Model','Search_strategy','train_neg_mean_absolute_error',
                                  'train_neg_mean_squared_error',
                                  'train_neg_median_absolute_error', 'test_neg_mean_absolute_error',
                                  'test_neg_mean_squared_error',
                                  'test_neg_median_absolute_error', 'fit_time', 'score_time'])

In [13]:
from sklearn import linear_model

## Ridge Regression

In [47]:
parameters = {'alpha':[0.1, 0.5, 1, 2, 3, 5, 8, 13, 1000, 5000]
             }

res_GS, res_RS = error_report(linear_model.Ridge(), parameters=parameters)

res_GS = make_line('Ridge Regression', 'GridSearchCV', res_GS)
res_RS = make_line('Ridge Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

## Bayesian Regression

In [48]:
parameters = {
    'n_iter': [100, 300, 500],
    'alpha_1': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'alpha_2': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_1':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_2':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0]
} 
res_GS, res_RS = error_report(linear_model.BayesianRidge(), parameters=parameters)

res_GS = make_line('Bayesian Regression', 'GridSearchCV', res_GS)
res_RS = make_line('Bayesian Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

## Logistic Regression

In [58]:
parameters={
    'solver': ['lbfgs'],
    'C':[0.1, 1.0, 2, 5, 13, 21],
    'max_iter':[100, 300, 500]
}
res_GS, res_RS = error_report(linear_model.LogisticRegression(), parameters=parameters)

res_GS = make_line('Logistic Regression', 'GridSearchCV', res_GS)
res_RS = make_line('Logistic Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

## Perceptron

In [50]:
parameters= {
    'alpha':[1e-5, 1e-3, 0.5, 1, 8, 21],
    'max_iter': [200, 500, 1000]
}
res_GS, res_RS = error_report(linear_model.Perceptron(tol=0.21), parameters=parameters)

res_GS = make_line('Perceptron', 'GridSearchCV', res_GS)
res_RS = make_line('Perceptron', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

## SGDRegressor

In [51]:
parameters={
    'loss':['squared_loss', 'huber', 'squared_epsilon_insensitive'],
    'penalty':['l1', 'l2', 'elasticnet'],
    'alpha':[1e-8, 1e-3, 0.5, 3, 8],
    'max_iter':[200, 300, 800]
    
}
res_GS, res_RS = error_report(linear_model.SGDRegressor(tol=1e-3), parameters=parameters)
res_GS = make_line('SGD Regressor', 'GridSearchCV', res_GS)
res_RS = make_line('SGD Regressor', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [52]:
results.head(10)

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-0.246551,-0.142371,-0.1449,-0.264596,-0.160251,-0.166859,9.842327,0.0
1,Ridge Regression,RandomSearchCV,-0.245547,-0.141438,-0.141478,-0.265214,-0.161179,-0.164832,0.065603,0.003125
2,Bayesian Regression,GridSearchCV,-0.244429,-0.14067,-0.141635,-0.265565,-0.160477,-0.160422,35.313071,0.006114
3,Bayesian Regression,RandomSearchCV,-0.244281,-0.140504,-0.142905,-0.266988,-0.161768,-0.162295,0.081397,0.000798
4,Logistic Regression,GridSearchCV,-0.056357,-0.112714,0.0,-0.070116,-0.140232,0.0,0.51612,0.008495
5,Logistic Regression,RandomSearchCV,-0.056357,-0.112714,0.0,-0.064277,-0.128554,0.0,0.118692,0.003125
6,Perceptron,GridSearchCV,-0.214919,-0.429839,0.0,-0.217918,-0.435837,0.0,0.271258,0.002593
7,Perceptron,RandomSearchCV,-0.214919,-0.429839,0.0,-0.217918,-0.435837,0.0,0.072604,0.00399
8,SGD Regressor,GridSearchCV,-0.26082,-0.160984,-0.143188,-0.281495,-0.179569,-0.171607,1.367118,0.000798
9,SGD Regressor,RandomSearchCV,-0.290716,-0.171694,-0.20158,-0.314938,-0.200308,-0.216301,0.08123,0.0


In [53]:
pd.options.display.float_format = '{5:.5f}'.format

In [54]:
results.iloc[:, 2:] = abs(results.iloc[:, 2:])

In [55]:
def myhighlight(x):
    return ['background-color: red' if v == x.max() else ('background-color: green' if v == x.min() else '') for v in x]

In [56]:
final_results = results.style.apply(myhighlight, subset=['train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error','test_neg_mean_absolute_error','test_neg_mean_squared_error','test_neg_median_absolute_error','fit_time','score_time'])

In [57]:
final_results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,0.246551,0.142371,0.1449,0.264596,0.160251,0.166859,9.84233,0.0
1,Ridge Regression,RandomSearchCV,0.245547,0.141438,0.141478,0.265214,0.161179,0.164832,0.065603,0.00312452
2,Bayesian Regression,GridSearchCV,0.244429,0.14067,0.141635,0.265565,0.160477,0.160422,35.3131,0.00611401
3,Bayesian Regression,RandomSearchCV,0.244281,0.140504,0.142905,0.266988,0.161768,0.162295,0.081397,0.000797987
4,Logistic Regression,GridSearchCV,0.0563568,0.112714,0.0,0.0701162,0.140232,0.0,0.51612,0.00849504
5,Logistic Regression,RandomSearchCV,0.0563568,0.112714,0.0,0.0642768,0.128554,0.0,0.118692,0.00312457
6,Perceptron,GridSearchCV,0.214919,0.429839,0.0,0.217918,0.435837,0.0,0.271258,0.00259333
7,Perceptron,RandomSearchCV,0.214919,0.429839,0.0,0.217918,0.435837,0.0,0.0726043,0.00399027
8,SGD Regressor,GridSearchCV,0.26082,0.160984,0.143188,0.281495,0.179569,0.171607,1.36712,0.000798368
9,SGD Regressor,RandomSearchCV,0.290716,0.171694,0.20158,0.314938,0.200308,0.216301,0.0812304,0.0
