In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,cross_val_score,KFold
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_validate
from sklearn import linear_model
from sklearn import preprocessing
import warnings
warnings.filterwarnings('ignore')
cvk=KFold(n_splits=5)

In [2]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [12]:
def scores_report(model, parameters):
    model_GS = GridSearchCV(model, param_grid=parameters, scoring='neg_mean_squared_error', cv = 3, iid = False, return_train_score=True, n_jobs=-1)
    model_RS = RandomizedSearchCV(estimator = model, param_distributions = parameters,scoring='neg_mean_squared_error', cv=3, n_iter=5, iid=False,return_train_score=True, n_jobs=-1)
    scores_GS = cross_validate(model_GS, X, y, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=cvk)
    scores_RS = cross_validate(model_RS,X, y, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=cvk)
    return scores_GS, scores_RS

In [13]:
def result_line(model:str, search_strategy:str, results):
    for key, value in results.items():
        results[key] = results[key].mean()
    results['Model'] = model
    results['Search_strategy'] = search_strategy
    return results    

<h1>BREAST CANCER DB</h1>

In [9]:
namesDict = ["ID", "Clump Thickness", "Uniformity of Cell Size", "Uniformity of Cell Shape", "Marginal Adhesion",
             "Single Epithelial Cell Size", "Bare Nuclei", "Bland Chromatin", "Normal Nucleoli", "Mitoses", "Class"]
df = pd.read_csv('breast-cancer-wisconsin.data', names = namesDict)
df = df.iloc[:, 1:]
df= df[df["Bare Nuclei"] != '?']
df['Bare Nuclei'] = df['Bare Nuclei'].astype(int)
X=df.iloc[:,:9].values
y=np.ravel(df.iloc[:,9:])
lab_enc=preprocessing.LabelEncoder()
y= lab_enc.fit_transform(y)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3)#,stratify=y)


In [10]:
results = pd.DataFrame(columns = ['Model','Search_strategy','train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error', 'test_neg_mean_absolute_error', 'test_neg_mean_squared_error', 'test_neg_median_absolute_error', 'fit_time', 'score_time'])
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time


<h2 style="color:purple;"><i>Logistic Regression</i></h2>

In [25]:
model1=linear_model.LogisticRegression()
model1.fit(X,y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [26]:
parameters={
    'solver': ['lbfgs'],
    'C':[0.1, 1.0, 2, 3, 5, 13, 21],
    'max_iter':[100, 200, 300, 500]
}
res_GS, res_RS = scores_report(model1, parameters=parameters)

res_GS = result_line('Logistic Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Logistic Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [27]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-0.123275,-0.035593,-0.072449,-0.132303,-0.040067,-0.083426,0.967014,0.000998
1,Ridge Regression,RandomSearchCV,-0.12265,-0.03535,-0.07313,-0.13359,-0.04044,-0.082315,0.04109,0.000997
2,Bayesian Regression,GridSearchCV,-0.122312,-0.035215,-0.07044,-0.132458,-0.039967,-0.081495,12.181628,0.002195
3,Bayesian Regression,RandomSearchCV,-0.122255,-0.035177,-0.070466,-0.132817,-0.040095,-0.080897,0.054255,0.002194
4,SGD Regressor,GridSearchCV,-0.2166,-0.058834,-0.20214,-0.21964,-0.060376,-0.202614,9.52114,0.001198
5,SGD Regressor,RandomSearchCV,-0.229927,-0.068554,-0.191941,-0.229188,-0.067923,-0.195576,0.518213,0.001198
6,Perceptron,GridSearchCV,-0.047576,-0.047576,0.0,-0.043871,-0.043871,0.0,1.849854,0.000997
7,Perceptron,RandomSearchCV,-0.047576,-0.047576,0.0,-0.043871,-0.043871,0.0,0.059042,0.005983
8,Logistic Regression,GridSearchCV,-0.027815,-0.027815,0.0,-0.035069,-0.035069,0.0,0.674596,0.000599
9,Logistic Regression,RandomSearchCV,-0.027815,-0.027815,0.0,-0.033609,-0.033609,0.0,0.226196,0.000797


<h2 style="color:dark green;"><i>Ridge Regression</i></h2>

In [11]:
model2=linear_model.Ridge()
model2.fit(X,y)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [14]:
parameters = {'alpha':[0.1, 0.5, 1, 2, 3, 6, 8, 11, 1000, 3000]
             }

res_GS, res_RS = scores_report(model2, parameters=parameters)

res_GS = result_line('Ridge Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Ridge Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [15]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-0.123275,-0.035593,-0.072449,-0.132303,-0.040067,-0.083426,0.967014,0.000998
1,Ridge Regression,RandomSearchCV,-0.12265,-0.03535,-0.07313,-0.13359,-0.04044,-0.082315,0.04109,0.000997


<h2 style="color:dark blue;"><i>Bayesian Regression</i></h2>


In [16]:
model3=linear_model.BayesianRidge()
model3.fit(X,y)

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [17]:
parameters = {
    'n_iter': [100, 300, 500],
    'alpha_1': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'alpha_2': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_1':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_2':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0]
} 
res_GS, res_RS = scores_report(model3, parameters=parameters)

res_GS = result_line('Bayesian Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Bayesian Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [18]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-0.123275,-0.035593,-0.072449,-0.132303,-0.040067,-0.083426,0.967014,0.000998
1,Ridge Regression,RandomSearchCV,-0.12265,-0.03535,-0.07313,-0.13359,-0.04044,-0.082315,0.04109,0.000997
2,Bayesian Regression,GridSearchCV,-0.122312,-0.035215,-0.07044,-0.132458,-0.039967,-0.081495,12.181628,0.002195
3,Bayesian Regression,RandomSearchCV,-0.122255,-0.035177,-0.070466,-0.132817,-0.040095,-0.080897,0.054255,0.002194


<h2 style="color:dark orange;"><i>SGD Regressor</i></h2>

In [19]:
model4=linear_model.SGDRegressor(eta0=0.000001)
model4.fit(X,y)

SGDRegressor(alpha=0.0001, average=False, early_stopping=False, epsilon=0.1,
       eta0=1e-06, fit_intercept=True, l1_ratio=0.15,
       learning_rate='invscaling', loss='squared_loss', max_iter=None,
       n_iter=None, n_iter_no_change=5, penalty='l2', power_t=0.25,
       random_state=None, shuffle=True, tol=None, validation_fraction=0.1,
       verbose=0, warm_start=False)

In [20]:
parameters={
    'loss':['squared_loss', 'huber', 'squared_epsilon_insensitive'],
    'penalty':['l1', 'l2', 'elasticnet'],
    'alpha':[1e-8, 1e-3, 0.5, 3, 8],
    'max_iter':[200, 300, 800]
    
}
res_GS, res_RS = scores_report(model4, parameters=parameters)

res_GS = result_line('SGD Regressor', 'GridSearchCV', res_GS)
res_RS = result_line('SGD Regressor', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [21]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-0.123275,-0.035593,-0.072449,-0.132303,-0.040067,-0.083426,0.967014,0.000998
1,Ridge Regression,RandomSearchCV,-0.12265,-0.03535,-0.07313,-0.13359,-0.04044,-0.082315,0.04109,0.000997
2,Bayesian Regression,GridSearchCV,-0.122312,-0.035215,-0.07044,-0.132458,-0.039967,-0.081495,12.181628,0.002195
3,Bayesian Regression,RandomSearchCV,-0.122255,-0.035177,-0.070466,-0.132817,-0.040095,-0.080897,0.054255,0.002194
4,SGD Regressor,GridSearchCV,-0.2166,-0.058834,-0.20214,-0.21964,-0.060376,-0.202614,9.52114,0.001198
5,SGD Regressor,RandomSearchCV,-0.229927,-0.068554,-0.191941,-0.229188,-0.067923,-0.195576,0.518213,0.001198


<h2 style="color:dark violet;"><i>Perceptron</i></h2>

In [22]:
model5=linear_model.Perceptron(tol=0.5)
model5.fit(X,y)

Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=0, shuffle=True, tol=0.5,
      validation_fraction=0.1, verbose=0, warm_start=False)

In [23]:
parameters= {
    'alpha':[1e-5, 1e-3, 0.5, 1, 8, 21],
    'max_iter': [200, 500, 800]
}
res_GS, res_RS = scores_report(model5, parameters=parameters)

res_GS = result_line('Perceptron', 'GridSearchCV', res_GS)
res_RS = result_line('Perceptron', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [24]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-0.123275,-0.035593,-0.072449,-0.132303,-0.040067,-0.083426,0.967014,0.000998
1,Ridge Regression,RandomSearchCV,-0.12265,-0.03535,-0.07313,-0.13359,-0.04044,-0.082315,0.04109,0.000997
2,Bayesian Regression,GridSearchCV,-0.122312,-0.035215,-0.07044,-0.132458,-0.039967,-0.081495,12.181628,0.002195
3,Bayesian Regression,RandomSearchCV,-0.122255,-0.035177,-0.070466,-0.132817,-0.040095,-0.080897,0.054255,0.002194
4,SGD Regressor,GridSearchCV,-0.2166,-0.058834,-0.20214,-0.21964,-0.060376,-0.202614,9.52114,0.001198
5,SGD Regressor,RandomSearchCV,-0.229927,-0.068554,-0.191941,-0.229188,-0.067923,-0.195576,0.518213,0.001198
6,Perceptron,GridSearchCV,-0.047576,-0.047576,0.0,-0.043871,-0.043871,0.0,1.849854,0.000997
7,Perceptron,RandomSearchCV,-0.047576,-0.047576,0.0,-0.043871,-0.043871,0.0,0.059042,0.005983


In [28]:
results.iloc[:, 2:] = abs(results.iloc[:, 2:])

In [29]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,0.123275,0.035593,0.072449,0.132303,0.040067,0.083426,0.967014,0.000998
1,Ridge Regression,RandomSearchCV,0.12265,0.03535,0.07313,0.13359,0.04044,0.082315,0.04109,0.000997
2,Bayesian Regression,GridSearchCV,0.122312,0.035215,0.07044,0.132458,0.039967,0.081495,12.181628,0.002195
3,Bayesian Regression,RandomSearchCV,0.122255,0.035177,0.070466,0.132817,0.040095,0.080897,0.054255,0.002194
4,SGD Regressor,GridSearchCV,0.2166,0.058834,0.20214,0.21964,0.060376,0.202614,9.52114,0.001198
5,SGD Regressor,RandomSearchCV,0.229927,0.068554,0.191941,0.229188,0.067923,0.195576,0.518213,0.001198
6,Perceptron,GridSearchCV,0.047576,0.047576,0.0,0.043871,0.043871,0.0,1.849854,0.000997
7,Perceptron,RandomSearchCV,0.047576,0.047576,0.0,0.043871,0.043871,0.0,0.059042,0.005983
8,Logistic Regression,GridSearchCV,0.027815,0.027815,0.0,0.035069,0.035069,0.0,0.674596,0.000599
9,Logistic Regression,RandomSearchCV,0.027815,0.027815,0.0,0.033609,0.033609,0.0,0.226196,0.000797


In [30]:
def color(grid):
    return ['background-color: red' if value == grid.max() else ('background-color: green' if value == grid.min() else '') for value in grid]

results_highlighted= results.style.apply(color, subset=['train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error','test_neg_mean_absolute_error','test_neg_mean_squared_error','test_neg_median_absolute_error','fit_time','score_time'])

results_highlighted

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,0.123275,0.0355927,0.0724494,0.132303,0.0400671,0.0834264,0.967014,0.000997925
1,Ridge Regression,RandomSearchCV,0.12265,0.0353495,0.0731298,0.13359,0.0404396,0.0823155,0.0410902,0.000997114
2,Bayesian Regression,GridSearchCV,0.122312,0.0352153,0.07044,0.132458,0.0399672,0.0814949,12.1816,0.00219541
3,Bayesian Regression,RandomSearchCV,0.122255,0.0351765,0.0704663,0.132817,0.0400954,0.0808971,0.0542552,0.00219393
4,SGD Regressor,GridSearchCV,0.2166,0.0588341,0.20214,0.21964,0.0603757,0.202614,9.52114,0.00119772
5,SGD Regressor,RandomSearchCV,0.229927,0.0685537,0.191941,0.229188,0.0679231,0.195576,0.518213,0.00119753
6,Perceptron,GridSearchCV,0.0475755,0.0475755,0.0,0.0438708,0.0438708,0.0,1.84985,0.000996876
7,Perceptron,RandomSearchCV,0.0475755,0.0475755,0.0,0.0438708,0.0438708,0.0,0.0590415,0.00598283
8,Logistic Regression,GridSearchCV,0.0278154,0.0278154,0.0,0.0350687,0.0350687,0.0,0.674596,0.000598669
9,Logistic Regression,RandomSearchCV,0.0278154,0.0278154,0.0,0.0336088,0.0336088,0.0,0.226196,0.000796795
