In [3]:
# This is imported for proper rendering of Latex in Notebook
from IPython.display import display, Math, Latex

In [4]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

np.random.seed(306)
plt.style.use('seaborn')

# Practice Assignment

In [1]:
from sklearn.metrics import explained_variance_score

def compute_explained_variance(y_true, y_pred):
    return explained_variance_score(y_true, y_pred)

In [5]:
y_true = np.array([7,4,9,4])
y_pred = np.array([8,7,12,5])
compute_explained_variance(y_true, y_pred)

0.7777777777777778

In [13]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

X,y = fetch_california_housing(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=1)

In [14]:
X_test.shape

(6192, 8)

In [15]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor

sgd_pipe = Pipeline([
    ('std_scaler', StandardScaler()),
    ('sgd', SGDRegressor(
        penalty='l1',
        alpha=0.01,
        random_state=1
    ))
])

In [22]:
def compute_score(X_train, y_train, X_test, y_test):
    sgd_pipe.fit(X_train, y_train)
    return sgd_pipe.score(X_test, y_test)

compute_score(X_train, y_train, X_test, y_test)

0.5951040704728554

# Graded Assignment

In [25]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import GridSearchCV

sgd_pipe = Pipeline([
    ('std_scaler', StandardScaler()),
    ('sgd', SGDRegressor(random_state=1))
])

param_grid = {
    'sgd__loss': ['squared_loss', 'huber'],
    'sgd__penalty': ['l1', 'l2'],
    'sgd__alpha': [0.1, 0.01, 0.001]
}

sgd_gs_cv = GridSearchCV(
    sgd_pipe,
    param_grid=param_grid,
    cv=4
)

sgd_gs_cv.fit(X_train, y_train)

GridSearchCV(cv=4,
             estimator=Pipeline(steps=[('std_scaler', StandardScaler()),
                                       ('sgd', SGDRegressor(random_state=1))]),
             param_grid={'sgd__alpha': [0.1, 0.01, 0.001],
                         'sgd__loss': ['squared_loss', 'huber'],
                         'sgd__penalty': ['l1', 'l2']})

In [26]:
sgd_best = sgd_gs_cv.best_estimator_
sgd_best.score(X_test, y_test)

0.5951040704728554

In [27]:
sgd_gs_cv.best_params_

{'sgd__alpha': 0.01, 'sgd__loss': 'squared_loss', 'sgd__penalty': 'l1'}

In [28]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

ridge_pipe = Pipeline([
    ('std_scaler', StandardScaler()),
    ('ridge', Ridge())
])

param_grid = {
    'ridge__alpha': [0.5, 0.1, 0.05, 0.01, 0.005, 0.001],
    'ridge__fit_intercept': [True, False]
}

ridge_gs_cv = GridSearchCV(
    ridge_pipe,
    param_grid=param_grid,
    cv=4
)

ridge_gs_cv.fit(X_train, y_train)

GridSearchCV(cv=4,
             estimator=Pipeline(steps=[('std_scaler', StandardScaler()),
                                       ('ridge', Ridge())]),
             param_grid={'ridge__alpha': [0.5, 0.1, 0.05, 0.01, 0.005, 0.001],
                         'ridge__fit_intercept': [True, False]})

In [29]:
ridge_best = ridge_gs_cv.best_estimator_
ridge_best.score(X_test, y_test)

0.597145061224877

In [30]:
ridge_gs_cv.best_params_

{'ridge__alpha': 0.5, 'ridge__fit_intercept': True}

In [31]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV

lasso_pipe = Pipeline([
    ('std_scaler', StandardScaler()),
    ('lasso', Lasso())
])

param_grid = {
    'lasso__alpha': [0.5, 0.1, 0.05, 0.01, 0.005, 0.001],
    'lasso__fit_intercept': [True, False]
}

lasso_gs_cv = GridSearchCV(
    lasso_pipe,
    param_grid=param_grid,
    cv=6
)

lasso_gs_cv.fit(X_train, y_train)

GridSearchCV(cv=6,
             estimator=Pipeline(steps=[('std_scaler', StandardScaler()),
                                       ('lasso', Lasso())]),
             param_grid={'lasso__alpha': [0.5, 0.1, 0.05, 0.01, 0.005, 0.001],
                         'lasso__fit_intercept': [True, False]})

In [32]:
lasso_best = lasso_gs_cv.best_estimator_
lasso_best.score(X_test, y_test)

0.5971275080716549

In [33]:
lasso_gs_cv.best_params_

{'lasso__alpha': 0.001, 'lasso__fit_intercept': True}