In [81]:
from IPython.core.display import display, HTML

In [82]:
display(HTML("""<a href="https://www.kaggle.com/nsrose7224/stochastic-gradient-descent-regressor">Kaggle Original Notebook Link</a>"""))

In [83]:
import numpy as np
import pandas as pd

In [84]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.linear_model import SGDRegressor

In [85]:
df = pd.read_csv('data/data.csv')
df.head()

Unnamed: 0,number_people,date,timestamp,day_of_week,is_weekend,is_holiday,temperature,is_start_of_semester,is_during_semester,month,hour
0,37,2015-08-14 17:00:11-07:00,61211,4,0,0,71.76,0,0,8,17
1,45,2015-08-14 17:20:14-07:00,62414,4,0,0,71.76,0,0,8,17
2,40,2015-08-14 17:30:15-07:00,63015,4,0,0,71.76,0,0,8,17
3,44,2015-08-14 17:40:16-07:00,63616,4,0,0,71.76,0,0,8,17
4,45,2015-08-14 17:50:17-07:00,64217,4,0,0,71.76,0,0,8,17


In [86]:
data = df.values
print(type(data))
print(data.shape)

<class 'numpy.ndarray'>
(62184, 11)


In [87]:
D_TRAIN = data[:, 2:]
D_TRAIN[:5]

array([[61211, 4, 0, 0, 71.76, 0, 0, 8, 17],
       [62414, 4, 0, 0, 71.76, 0, 0, 8, 17],
       [63015, 4, 0, 0, 71.76, 0, 0, 8, 17],
       [63616, 4, 0, 0, 71.76, 0, 0, 8, 17],
       [64217, 4, 0, 0, 71.76, 0, 0, 8, 17]], dtype=object)

In [88]:
D_Y = data[:, 0]
D_Y[:5]

array([37, 45, 40, 44, 45], dtype=object)

In [89]:
D_Train, d_test, Y_Train, y_test = train_test_split(D_TRAIN, D_Y, test_size = 0.25, random_state = 42)

In [90]:
scaler = StandardScaler()
scaler.fit(D_Train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [91]:
d_train_trfm = scaler.transform(D_Train)

In [92]:
d_test_trfm = scaler.transform(d_test)

In [93]:
model = SGDRegressor()

In [94]:
alphas = np.power(10.00, -np.arange(1, 10))
alphas

array([1.e-01, 1.e-02, 1.e-03, 1.e-04, 1.e-05, 1.e-06, 1.e-07, 1.e-08,
       1.e-09])

In [95]:
loss_fun = ['squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive']
loss_fun

['squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive']

In [96]:
penalty = ['l2', 'l1', 'elasticnet']
penalty

['l2', 'l1', 'elasticnet']

In [97]:
lr = ['constant', 'optimal', 'invscaling', 'adaptive']
lr

['constant', 'optimal', 'invscaling', 'adaptive']

In [98]:
params_grid = {
   'alpha' : alphas,
   'loss' : loss_fun,
   'penalty': penalty,
   'learning_rate': lr
}

In [100]:
clf = GridSearchCV(model, params_grid, n_jobs = -1, return_train_score = True, verbose = 2)
clf.fit(d_train_trfm, Y_Train)

Fitting 5 folds for each of 432 candidates, totalling 2160 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   45.0s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:  3.6min
[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed:  8.0min
[Parallel(n_jobs=-1)]: Done 2160 out of 2160 | elapsed:  9.5min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SGDRegressor(alpha=0.0001, average=False,
                                    early_stopping=False, epsilon=0.1,
                                    eta0=0.01, fit_intercept=True,
                                    l1_ratio=0.15, learning_rate='invscaling',
                                    loss='squared_loss', max_iter=1000,
                                    n_iter_no_change=5, penalty='l2',
                                    power_t=0.25, random_state=None,
                                    shuffle=True, tol=0.001,
                                    validation_fraction=0.1, verbose=0,
                                    warm_s...
             param_grid={'alpha': array([1.e-01, 1.e-02, 1.e-03, 1.e-04, 1.e-05, 1.e-06, 1.e-07, 1.e-08,
       1.e-09]),
                         'learning_rate': ['constant', 'optimal', 'invscaling',
                                           'adaptive'],
                         'loss'

In [101]:
clf.cv_results_

{'mean_fit_time': array([ 0.07300158,  0.12555733,  0.10500746,  0.11614676,  0.14943156,
         0.15160761,  0.11338186,  0.14819474,  0.15292935,  0.08131089,
         0.10938988,  0.10515914,  5.8419414 , 12.92962966,  6.40584269,
         0.35550957,  0.48997622,  0.4961565 ,  0.09701166,  0.2742475 ,
         0.12933812, 10.55264583, 13.53474956, 13.79784727,  0.18856068,
         0.49830885,  0.33244114,  0.82815409,  1.06349163,  1.07866731,
         0.15059581,  0.29291677,  0.22675815,  0.35386424,  0.41525025,
         0.30343609,  0.57038693,  4.07357922,  0.69684582,  0.41603284,
         0.62095766,  0.59453125,  0.52949214,  0.59867368,  0.66849399,
         0.73262353,  2.55229421,  0.80927882,  0.07419639,  0.08223844,
         0.08345938,  0.09315376,  0.14366708,  0.13892522,  0.10349793,
         0.14065433,  0.14994965,  0.07220597,  0.09811044,  0.08981056,
         9.2902185 , 12.34900956, 13.11089087,  0.09203057,  0.1472971 ,
         0.12041936,  0.11177087, 

In [102]:
clf.best_estimator_

SGDRegressor(alpha=1e-05, average=False, early_stopping=False, epsilon=0.1,
             eta0=0.01, fit_intercept=True, l1_ratio=0.15,
             learning_rate='adaptive', loss='squared_epsilon_insensitive',
             max_iter=1000, n_iter_no_change=5, penalty='elasticnet',
             power_t=0.25, random_state=None, shuffle=True, tol=0.001,
             validation_fraction=0.1, verbose=0, warm_start=False)

In [103]:
clf.best_params_

{'alpha': 1e-05,
 'learning_rate': 'adaptive',
 'loss': 'squared_epsilon_insensitive',
 'penalty': 'elasticnet'}

In [104]:
clf.best_score_

0.5066340445727153

In [105]:
clf.best_index_

239