In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate

In [2]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [5]:
namesDict = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV"]
data_set = pd.read_csv('housing.data',delimiter=r'\s+' ,names=namesDict)

In [7]:
data_set.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [15]:
x_set = data_set.iloc[:, :13]
y_set = data_set.iloc[:, 13]
y_set = np.ravel(y_set)

In [16]:
def error_report(model, parameters):
    model_GS = GridSearchCV(model, param_grid=parameters, cv = 3, iid = False, return_train_score=True, n_jobs=-1)
    model_RS = RandomizedSearchCV(estimator = model, param_distributions = parameters, cv=3, n_iter=4, iid=False, n_jobs=-1)
    scores_GS = cross_validate(model_GS, x_set, y_set, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=5)
    scores_RS = cross_validate(model_RS, x_set, y_set, return_train_score=True, scoring=('neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error'),cv=5)
    return scores_GS, scores_RS

In [17]:
def make_line(model:str, search_strategy:str, results):
    for key, value in results.items():
        results[key] = results[key].mean()
    results['Model'] = model
    results['Search_strategy'] = search_strategy
    return results
    

In [18]:
results = pd.DataFrame(columns = ['Model','Search_strategy','train_neg_mean_absolute_error', 'train_neg_mean_squared_error', 'train_neg_median_absolute_error', 'test_neg_mean_absolute_error', 'test_neg_mean_squared_error', 'test_neg_median_absolute_error', 'fit_time', 'score_time'])


In [19]:
from sklearn import linear_model

## Ridge Regression

In [20]:
parameters = {'alpha':[0.1, 0.5, 1, 2, 3, 5, 8, 13, 1000, 5000]
             }

res_GS, res_RS = error_report(linear_model.Ridge(), parameters=parameters)

res_GS = make_line('Ridge Regression', 'GridSearchCV', res_GS)
res_RS = make_line('Ridge Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

## Bayesian Regression


In [22]:
parameters = {
    'n_iter': [100, 300, 500],
    'alpha_1': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'alpha_2': [1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_1':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0],
    'lambda_2':[1e-10, 1e-5, 0.001, 1.0, 5.0, 8.0]
} 
res_GS, res_RS = error_report(linear_model.BayesianRidge(), parameters=parameters)

res_GS = make_line('Bayesian Regression', 'GridSearchCV', res_GS)
res_RS = make_line('Bayesian Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

## Logistic Regression

In [23]:
parameters={
    'solver': ['lbfgs'],
    'C':[0.1, 1.0, 2, 5, 13, 21],
    'max_iter':[100, 300, 500]
}
res_GS, res_RS = error_report(linear_model.LogisticRegression(), parameters=parameters)

res_GS = make_line('Logistic Regression', 'GridSearchCV', res_GS)
res_RS = make_line('Logistic Regression', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)



ValueError: Unknown label type: 'continuous'

## Perceptron


In [24]:
parameters= {
    'alpha':[1e-5, 1e-3, 0.5, 1, 8, 21],
    'max_iter': [200, 500, 1000]
}
res_GS, res_RS = error_report(linear_model.Perceptron(tol=0.21), parameters=parameters)

res_GS = make_line('Perceptron', 'GridSearchCV', res_GS)
res_RS = make_line('Perceptron', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

ValueError: Unknown label type: (array([ 5. ,  5.6,  6.3,  7. ,  7.2,  7.4,  7.5,  8.1,  8.3,  8.4,  8.5,
        8.7,  8.8,  9.5,  9.6,  9.7, 10.2, 10.4, 10.5, 10.8, 10.9, 11. ,
       11.3, 11.5, 11.7, 11.8, 11.9, 12. , 12.1, 12.3, 12.5, 12.6, 12.7,
       12.8, 13. , 13.1, 13.3, 13.4, 13.5, 13.6, 13.8, 13.9, 14.1, 14.2,
       14.3, 14.5, 14.6, 14.9, 15. , 15.1, 15.2, 15.4, 16.1, 16.2, 16.3,
       16.4, 16.5, 16.7, 16.8, 17.1, 17.2, 17.5, 17.6, 17.7, 17.8, 17.9,
       18.2, 18.3, 18.4, 18.5, 18.6, 18.7, 19. , 19.1, 19.3, 19.4, 19.5,
       19.6, 19.7, 19.8, 19.9, 20. , 20.1, 20.2, 20.3, 20.4, 20.5, 20.6,
       20.7, 20.8, 20.9, 21. , 21.1, 21.2, 21.4, 21.6, 21.7, 21.8, 21.9,
       22. , 22.1, 22.2, 22.3, 22.4, 22.5, 22.6, 22.7, 22.8, 22.9, 23. ,
       23.1, 23.2, 23.3, 23.7, 23.8, 23.9, 24.1, 24.3, 24.4, 24.5, 24.6,
       24.8, 25. , 25.2, 26.2, 26.4, 26.6, 27.1, 27.5, 27.9, 28.2, 28.4,
       28.5, 28.6, 29. , 29.1, 29.6, 29.8, 30.1, 30.7, 31. , 31.2, 31.5,
       32. , 32.2, 32.4, 32.7, 33.1, 33.2, 33.4, 33.8, 35.1, 35.2, 35.4,
       36. , 36.1, 36.5, 37.3, 42.8, 43.1, 43.5, 44. , 45.4, 46. , 48.8,
       50. ]),)

## SGDRegressor

In [25]:
parameters={
    'loss':['squared_loss', 'huber', 'squared_epsilon_insensitive'],
    'penalty':['l1', 'l2', 'elasticnet'],
    'alpha':[1e-8, 1e-3, 0.5, 3, 8],
    'max_iter':[200, 300, 800]
    
}
res_GS, res_RS = error_report(linear_model.SGDRegressor(tol=1e-3), parameters=parameters)
res_GS = make_line('SGD Regressor', 'GridSearchCV', res_GS)
res_RS = make_line('SGD Regressor', 'RandomSearchCV', res_RS)
results = results.append(res_GS, ignore_index=True)
results = results.append(res_RS, ignore_index=True)

In [26]:
results.head(100)


Unnamed: 0,Model,Search_strategy,train_neg_mean_absolute_error,train_neg_mean_squared_error,train_neg_median_absolute_error,test_neg_mean_absolute_error,test_neg_mean_squared_error,test_neg_median_absolute_error,fit_time,score_time
0,Ridge Regression,GridSearchCV,-3.734888,-28.98078,-2.697141,-4.164936,-34.17595,-3.260269,9.664654,0.004993
1,Ridge Regression,RandomSearchCV,-3.426658,-24.79366,-2.47574,-4.280242,-36.94329,-3.304348,0.070122,0.001396
2,Bayesian Regression,GridSearchCV,-3.315837,-22.59252,-2.389198,-4.138437,-37.02705,-2.916293,38.260155,0.003645
3,Bayesian Regression,RandomSearchCV,-3.295242,-22.20761,-2.399684,-4.085823,-36.77346,-2.865376,0.139021,0.005783
4,SGD Regressor,GridSearchCV,-12.91168,-249.702,-12.32984,-14.60777,-304.8777,-13.95634,2.270629,0.001596
5,SGD Regressor,RandomSearchCV,-73409700000000.0,-2.107477e+28,-70856520000000.0,-57781700000000.0,-1.28654e+28,-59640710000000.0,0.118362,0.003729
