In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,cross_val_score,KFold
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_validate
from sklearn import linear_model
from sklearn import preprocessing
import warnings
warnings.filterwarnings('ignore')
cvk=KFold(n_splits=5)

In [2]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [14]:
def scores_report(model, parameters):
    model_GS = GridSearchCV(model, param_grid=parameters, scoring='neg_mean_squared_error', cv = 3, iid = False, return_train_score=True, n_jobs=-1)
    model_RS = RandomizedSearchCV(estimator = model, param_distributions = parameters,scoring='neg_mean_squared_error', cv=3, n_iter=5, iid=False,return_train_score=True, n_jobs=-1)
    scores_GS = cross_validate(model_GS, X, y, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=cvk)
    scores_RS = cross_validate(model_RS,X, y, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=cvk)
    return scores_GS, scores_RS

In [15]:
def result_line(model:str, search_strategy:str, results):
    for key, value in results.items():
        results[key] = results[key].mean()
    results['Model'] = model
    results['Search_strategy'] = search_strategy
    return results    

<h1>CRIME COMMUNITIES DB</h1>

In [24]:
df = pd.read_csv('communities.data', header=None)
df = df.iloc[:, 5:]
df = df[(df != '?').all(axis=1)]
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3)#,stratify=y)
X = df.iloc[:, :-1]
y = np.ravel(df.iloc[:, -1])
lab_enc=preprocessing.LabelEncoder()
y= lab_enc.fit_transform(y)

In [11]:
results = pd.DataFrame(columns = ['Model','Search_strategy','train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error', 'test_neg_mean_absolute_error', 'test_neg_mean_squared_error', 'test_neg_median_absolute_error', 'fit_time', 'score_time'])
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time


<h2 style="color:purple;"><i>Logistic Regression</i></h2>

In [25]:
model1=linear_model.LogisticRegression()
model1.fit(X,y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [26]:
parameters={
    'solver': ['lbfgs'],
    'C':[0.1, 1.0, 2, 3, 5, 13, 21],
    'max_iter':[100, 200, 300, 500]
}
res_GS, res_RS = scores_report(model1, parameters=parameters)

res_GS = result_line('Logistic Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Logistic Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [27]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-10.70313,-187.543571,-8.602842,-11.826378,-227.255578,-9.974054,1.756902,0.005186
1,Ridge Regression,RandomSearchCV,-10.656224,-185.797295,-8.596707,-11.817273,-228.543652,-9.89959,0.119879,0.004787
2,Logistic Regression,GridSearchCV,-2.598827,-74.249838,0.0,-15.551736,-427.193998,-11.8,25.206599,0.008178
3,Logistic Regression,RandomSearchCV,-2.382356,-66.511798,0.0,-15.339236,-419.525248,-11.9,11.870457,0.011569


<h2 style="color:dark green;"><i>Ridge Regression</i></h2>

In [16]:
model2=linear_model.Ridge()
model2.fit(X,y)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [17]:
parameters = {'alpha':[0.1, 0.5, 1, 2, 3, 6, 8, 11, 1000, 3000]
             }

res_GS, res_RS = scores_report(model2, parameters=parameters)

res_GS = result_line('Ridge Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Ridge Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [21]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-10.70313,-187.543571,-8.602842,-11.826378,-227.255578,-9.974054,1.756902,0.005186
1,Ridge Regression,RandomSearchCV,-10.656224,-185.797295,-8.596707,-11.817273,-228.543652,-9.89959,0.119879,0.004787


<h2 style="color:dark blue;"><i>Bayesian Regression</i></h2>


In [28]:
model3=linear_model.BayesianRidge()
model3.fit(X,y)

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [29]:
parameters = {
    'n_iter': [100, 300, 500],
    'alpha_1': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'alpha_2': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_1':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_2':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0]
} 
res_GS, res_RS = scores_report(model3, parameters=parameters)

res_GS = result_line('Bayesian Regression', 'GridSearchCV', res_GS)
res_RS = result_line('Bayesian Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [30]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-10.70313,-187.543571,-8.602842,-11.826378,-227.255578,-9.974054,1.756902,0.005186
1,Ridge Regression,RandomSearchCV,-10.656224,-185.797295,-8.596707,-11.817273,-228.543652,-9.89959,0.119879,0.004787
2,Logistic Regression,GridSearchCV,-2.598827,-74.249838,0.0,-15.551736,-427.193998,-11.8,25.206599,0.008178
3,Logistic Regression,RandomSearchCV,-2.382356,-66.511798,0.0,-15.339236,-419.525248,-11.9,11.870457,0.011569
4,Bayesian Regression,GridSearchCV,-10.648329,-185.798567,-8.539542,-11.818543,-227.458023,-9.994391,101.338025,0.005583
5,Bayesian Regression,RandomSearchCV,-10.644641,-185.68975,-8.575282,-11.815765,-227.266346,-9.98549,0.177127,0.004788


<h2 style="color:dark orange;"><i>SGD Regressor</i></h2>

In [31]:
model4=linear_model.SGDRegressor(eta0=0.000001)
model4.fit(X,y)

SGDRegressor(alpha=0.0001, average=False, early_stopping=False, epsilon=0.1,
       eta0=1e-06, fit_intercept=True, l1_ratio=0.15,
       learning_rate='invscaling', loss='squared_loss', max_iter=None,
       n_iter=None, n_iter_no_change=5, penalty='l2', power_t=0.25,
       random_state=None, shuffle=True, tol=None, validation_fraction=0.1,
       verbose=0, warm_start=False)

In [32]:
parameters={
    'loss':['squared_loss', 'huber', 'squared_epsilon_insensitive'],
    'penalty':['l1', 'l2', 'elasticnet'],
    'alpha':[1e-8, 1e-3, 0.5, 3, 8],
    'max_iter':[200, 300, 800]
    
}
res_GS, res_RS = scores_report(model4, parameters=parameters)

res_GS = result_line('SGD Regressor', 'GridSearchCV', res_GS)
res_RS = result_line('SGD Regressor', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [33]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-10.70313,-187.543571,-8.602842,-11.826378,-227.255578,-9.974054,1.756902,0.005186
1,Ridge Regression,RandomSearchCV,-10.656224,-185.797295,-8.596707,-11.817273,-228.543652,-9.89959,0.119879,0.004787
2,Logistic Regression,GridSearchCV,-2.598827,-74.249838,0.0,-15.551736,-427.193998,-11.8,25.206599,0.008178
3,Logistic Regression,RandomSearchCV,-2.382356,-66.511798,0.0,-15.339236,-419.525248,-11.9,11.870457,0.011569
4,Bayesian Regression,GridSearchCV,-10.648329,-185.798567,-8.539542,-11.818543,-227.458023,-9.994391,101.338025,0.005583
5,Bayesian Regression,RandomSearchCV,-10.644641,-185.68975,-8.575282,-11.815765,-227.266346,-9.98549,0.177127,0.004788
6,SGD Regressor,GridSearchCV,-25.640898,-1129.135515,-17.545922,-25.70281,-1133.427969,-19.248381,12.775831,0.005386
7,SGD Regressor,RandomSearchCV,-30.202627,-1490.681438,-23.736422,-30.15237,-1462.561389,-24.830488,1.055977,0.011568


<h2 style="color:dark violet;"><i>Perceptron</i></h2>

In [34]:
model5=linear_model.Perceptron(tol=0.5)
model5.fit(X,y)

Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
      fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty=None, random_state=0, shuffle=True, tol=0.5,
      validation_fraction=0.1, verbose=0, warm_start=False)

In [35]:
parameters= {
    'alpha':[1e-5, 1e-3, 0.5, 1, 8, 21],
    'max_iter': [200, 500, 800]
}
res_GS, res_RS = scores_report(model5, parameters=parameters)

res_GS = result_line('Perceptron', 'GridSearchCV', res_GS)
res_RS = result_line('Perceptron', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [36]:
results

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-10.70313,-187.543571,-8.602842,-11.826378,-227.255578,-9.974054,1.756902,0.005186
1,Ridge Regression,RandomSearchCV,-10.656224,-185.797295,-8.596707,-11.817273,-228.543652,-9.89959,0.119879,0.004787
2,Logistic Regression,GridSearchCV,-2.598827,-74.249838,0.0,-15.551736,-427.193998,-11.8,25.206599,0.008178
3,Logistic Regression,RandomSearchCV,-2.382356,-66.511798,0.0,-15.339236,-419.525248,-11.9,11.870457,0.011569
4,Bayesian Regression,GridSearchCV,-10.648329,-185.798567,-8.539542,-11.818543,-227.458023,-9.994391,101.338025,0.005583
5,Bayesian Regression,RandomSearchCV,-10.644641,-185.68975,-8.575282,-11.815765,-227.266346,-9.98549,0.177127,0.004788
6,SGD Regressor,GridSearchCV,-25.640898,-1129.135515,-17.545922,-25.70281,-1133.427969,-19.248381,12.775831,0.005386
7,SGD Regressor,RandomSearchCV,-30.202627,-1490.681438,-23.736422,-30.15237,-1462.561389,-24.830488,1.055977,0.011568
8,Perceptron,GridSearchCV,-16.592099,-536.596535,-12.4,-19.053522,-576.668899,-15.8,1.634635,0.011792
9,Perceptron,RandomSearchCV,-16.592099,-536.596535,-12.4,-19.053522,-576.668899,-15.8,0.705127,0.008364


In [37]:
results.iloc[:, 2:] = abs(results.iloc[:, 2:])

In [38]:
def color(grid):
    return ['background-color: red' if value == grid.max() else ('background-color: green' if value == grid.min() else '') for value in grid]

results_highlighted= results.style.apply(color, subset=['train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error','test_neg_mean_absolute_error','test_neg_mean_squared_error','test_neg_median_absolute_error','fit_time','score_time'])

results_highlighted

Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,10.7031,187.544,8.60284,11.8264,227.256,9.97405,1.7569,0.00518613
1,Ridge Regression,RandomSearchCV,10.6562,185.797,8.59671,11.8173,228.544,9.89959,0.119879,0.00478745
2,Logistic Regression,GridSearchCV,2.59883,74.2498,0.0,15.5517,427.194,11.8,25.2066,0.00817828
3,Logistic Regression,RandomSearchCV,2.38236,66.5118,0.0,15.3392,419.525,11.9,11.8705,0.0115688
4,Bayesian Regression,GridSearchCV,10.6483,185.799,8.53954,11.8185,227.458,9.99439,101.338,0.00558319
5,Bayesian Regression,RandomSearchCV,10.6446,185.69,8.57528,11.8158,227.266,9.98549,0.177127,0.00478759
6,SGD Regressor,GridSearchCV,25.6409,1129.14,17.5459,25.7028,1133.43,19.2484,12.7758,0.0053865
7,SGD Regressor,RandomSearchCV,30.2026,1490.68,23.7364,30.1524,1462.56,24.8305,1.05598,0.0115678
8,Perceptron,GridSearchCV,16.5921,536.597,12.4,19.0535,576.669,15.8,1.63464,0.0117925
9,Perceptron,RandomSearchCV,16.5921,536.597,12.4,19.0535,576.669,15.8,0.705127,0.00836372
