# Lab - Regularization

## Week 4 Tuesday 11th June

### TASK: Regularized regression
### FUNCTIONS: Ridge, RidgeCV, Lasso, LassoCV
### DOCUMENTATION: http://scikit-learn.org/stable/modules/linear_model.html
### DATA: Crime (n=319 non-null, p=122, type=regression)
### DATA DICTIONARY: http://archive.ics.uci.edu/ml/datasets/Communities+and+Crime

This data set contains data on violent crimes within a community.

In [68]:
########## Prepare data ##########
# read in data, remove categorical features, remove rows with missing values
import pandas as pd
crime = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data', header=None, na_values=['?'])
crime = crime.iloc[:, 5:]
crime.dropna(inplace=True)
crime.head()

# define X and y
X = crime.iloc[:, :-1]
y = crime.iloc[:, -1]

# split into train/test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


In [69]:
# How many columns are in X?
X.shape

(319, 122)

In [70]:
########## Linear Regression Model Without Regularization ##########
# linear regression
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit(X_train, y_train)
lm.coef_
# What are these numbers?

array([ -3.66188167e+00,   6.98124465e-01,  -2.61955467e-01,
        -2.85270027e-01,  -1.64740837e-01,   2.46972333e-01,
        -1.09290051e+00,  -5.96857796e-01,   1.11200239e+00,
        -7.21968931e-01,   4.27346598e+00,  -2.28040268e-01,
         8.04875769e-01,  -2.57934732e-01,  -2.63458023e-01,
        -1.04616958e+00,   6.07784197e-01,   7.73552561e-01,
         5.96468029e-02,   6.90215922e-01,   2.16759430e-02,
        -4.87802949e-01,  -5.18858404e-01,   1.39478815e-01,
        -1.24417942e-01,   3.15003821e-01,  -1.52633736e-01,
        -9.65003927e-01,   1.17142163e+00,  -3.08546690e-02,
        -9.29085548e-01,   1.24654586e-01,   1.98104506e-01,
         7.30804821e-01,  -1.77337294e-01,   8.32927588e-02,
         3.46045601e-01,   5.01837338e-01,   1.57062958e+00,
        -4.13478807e-01,   1.39350802e+00,  -3.49428114e+00,
         7.09577818e-01,  -8.32141352e-01,  -1.39984927e+00,
         1.02482840e+00,   2.13855006e-01,  -6.18937325e-01,
         5.28954490e-01,

In [71]:
# make predictions and evaluate
import numpy as np
from sklearn import metrics
preds = lm.predict(X_test)
print('RMSE (no regularization) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))

RMSE (no regularization) = 0.233813676495


In [72]:
########## Ridge Regression Model ##########
# ridge regression (alpha must be positive, larger means more regularization)
from sklearn.linear_model import Ridge
rreg = Ridge(alpha=0.1, normalize=True)
rreg.fit(X_train, y_train)
rreg.coef_
#preds = rreg.predict(X_test)
#print('RMSE (Ridge reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))
# Is this model better? Why?

array([ -4.00298418e-03,   3.51647445e-02,   6.03535935e-02,
        -7.68532502e-02,  -1.76099849e-02,   4.53791433e-02,
         8.81586468e-03,  -2.88885814e-02,  -1.92143587e-02,
         3.36122201e-02,   5.71590736e-04,  -4.85438136e-02,
         5.55725157e-02,  -1.15934270e-01,  -1.11880845e-01,
        -3.32742094e-01,  -1.12302031e-02,   9.63833243e-02,
        -8.92057732e-02,   8.42691702e-02,  -1.67246717e-02,
         7.42520308e-03,  -1.21294025e-01,  -6.70155789e-02,
        -1.74250249e-03,   1.69446833e-01,   3.18217654e-02,
        -1.00209834e-01,   3.97535644e-02,  -1.19173054e-01,
        -1.04445267e-01,  -5.14946676e-03,   1.10071013e-01,
        -3.22958955e-02,  -1.40601627e-01,   7.72658029e-02,
         9.07962536e-02,  -3.78878862e-03,   4.61941793e-02,
         6.30299731e-02,  -3.09236932e-02,   1.02883578e-02,
         9.70425568e-02,  -1.28936944e-01,  -1.38268907e-01,
        -6.37169778e-02,  -8.80160419e-02,  -4.01991014e-02,
         8.11064596e-02,

In [73]:
preds = rreg.predict(X_test)
print('RMSE (Ridge reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))
# Is this model better? Why?

RMSE (Ridge reg.) = 0.164279068049


In [74]:
# use RidgeCV to select best alpha
from sklearn.linear_model import RidgeCV
alpha_range = 10.**np.arange(-2, 3)
rregcv = RidgeCV(normalize=True, scoring='neg_mean_squared_error', alphas=alpha_range)
rregcv.fit(X_train, y_train)

# Print the optimal value of Alpha for Ridge Regression
print('Optimal Alpha Value: ', rregcv.alpha_)

# Print the RMSE for the ridge regression model
preds = rregcv.predict(X_test)
print ('RMSE (Ridge CV reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))
# What is the range of alpha values we are searching over?

Optimal Alpha Value:  1.0
RMSE (Ridge CV reg.) = 0.163129782343


In [78]:
rregcv.coef_

array([ -1.35479199e-03,   3.66493623e-03,   5.67246153e-02,
        -6.65596102e-02,   7.50154730e-03,   3.73570277e-03,
         1.48458510e-02,  -8.26212596e-03,  -9.14751985e-04,
         8.17782143e-03,  -9.58221848e-04,   3.99333039e-03,
        -2.04774531e-02,  -3.80310378e-02,  -7.06311041e-02,
        -9.07995340e-02,   3.92365601e-03,   3.68491166e-02,
        -2.35269424e-02,  -1.36618143e-02,  -9.83437557e-03,
         1.30086791e-02,  -3.43297706e-02,  -5.04638755e-02,
        -9.82883411e-04,   7.47392898e-02,   2.63572032e-02,
        -1.07987605e-02,   3.16035521e-02,  -2.17283831e-02,
        -4.45588182e-03,  -1.06490401e-02,   4.42829964e-02,
        -3.72944143e-02,  -6.18713730e-02,   3.20124805e-02,
         5.85549588e-03,  -1.23569409e-02,   6.53560040e-02,
         3.46461301e-02,   6.00524147e-02,   6.39805254e-02,
         2.58651194e-02,  -6.73126020e-02,  -7.02669216e-02,
        -5.05555985e-02,  -6.41318316e-02,   8.24959798e-03,
         9.27945661e-03,

In [79]:
# use RidgeCV to select best alpha
from sklearn.linear_model import RidgeCV
alpha_range = 10.**np.arange(-2, 3)
rregcv = RidgeCV(normalize=True,  alphas=alpha_range)
rregcv.fit(X_train, y_train)

# Print the optimal value of Alpha for Ridge Regression
print('Optimal Alpha Value: ', rregcv.alpha_)

# Print the RMSE for the ridge regression model
preds = rregcv.predict(X_test)
print ('RMSE (Ridge CV reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))
# What is the range of alpha values we are searching over?

Optimal Alpha Value:  1.0
RMSE (Ridge CV reg.) = 0.163129782343


In [80]:
########## Lasso Regression Model ##########
# lasso (alpha must be positive, larger means more regularization)
from sklearn.linear_model import Lasso
las = Lasso(alpha=0.01, normalize=True)
las.fit(X_train, y_train)
las.coef_
#preds = las.predict(X_test)
#print('RMSE (Lasso reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))

array([ 0.        ,  0.        ,  0.        , -0.03974695,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        , -0.        , -0.        , -0.        ,
       -0.        , -0.        ,  0.        , -0.        , -0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
        0.        , -0.        ,  0.        , -0.        , -0.        ,
        0.        ,  0.        , -0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.        , -0.27503063,
       -0.        , -0.        , -0.        , -0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.        ,  0.        ,  0.  

In [81]:
preds = las.predict(X_test)
print('RMSE (Lasso reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))

RMSE (Lasso reg.) = 0.198165225429


In [82]:
# try a smaller alpha
las = Lasso(alpha=0.0001, normalize=True)
las.fit(X_train, y_train)
las.coef_
preds = las.predict(X_test)
print('RMSE (Lasso reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))


RMSE (Lasso reg.) = 0.164502413721


In [83]:
# use LassoCV to select best alpha (tries 100 alphas by default)
from sklearn.linear_model import LassoCV
alpha_range = 10.**np.arange(-10, 10)
lascv = LassoCV(normalize=True, alphas=alpha_range)
lascv.fit(X_train, y_train)
print('Optimal Alpha Value: ',lascv.alpha_)
lascv.coef_
preds = lascv.predict(X_test)
print('RMSE (Lasso CV reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))



Optimal Alpha Value:  0.001
RMSE (Lasso CV reg.) = 0.160039024044


### Task 1: Carry out Elastic net regularised regression

### Lookup [Elastic Net](http://scikit-learn.org/stable/modules/linear_model.html#elastic-net) and complete the following.



1. What is elastic net?
2. How does it work?
3. Run elastic net on the above dataset

In [114]:
# Setup the elastic net model
from sklearn.linear_model import ElasticNetCV
alpha_range = 10.**np.arange(-2, 3)
enetcv = ElasticNetCV(alphas=alpha_range, l1_ratio=0.7)

enetcv.fit(X_train, y_train)
enetcv.coef_
#preds = enet.predict(X_test)
#print('RMSE (ENET CV reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))

array([ 0.        , -0.        ,  0.08672231, -0.22257432,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.        , -0.        , -0.        ,
       -0.        ,  0.        ,  0.05102937, -0.        , -0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
        0.        , -0.        ,  0.        ,  0.0700438 ,  0.        ,
        0.        , -0.        ,  0.        , -0.        , -0.        ,
        0.        ,  0.        , -0.        ,  0.03387024,  0.        ,
        0.        ,  0.        ,  0.        , -0.04775612, -0.14505711,
       -0.03623363, -0.        , -0.        , -0.        ,  0.        ,
        0.15742425,  0.        ,  0.        , -0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.        ,  0.        ,  0.        ,  0.        , -0.        ,
       -0.        ,  0.        , -0.        ,  0.        ,  0.  

In [105]:
print (X_train)

       5     6     7     8     9     10    11    12    13    14   ...    117  \
185   0.28  0.33  0.66  0.45  0.09  0.02  0.51  0.60  0.48  0.37  ...   0.05   
637   0.13  0.53  0.25  0.70  0.10  0.12  0.42  0.53  0.35  0.38  ...   0.08   
447   0.08  0.49  1.00  0.00  0.13  0.19  0.45  0.55  0.35  0.28  ...   0.11   
1859  0.12  0.44  0.36  0.64  0.03  0.15  0.39  0.48  0.28  0.41  ...   0.26   
1050  0.29  0.34  0.08  0.86  0.24  0.04  0.63  0.78  0.71  0.27  ...   0.44   
1872  0.11  0.42  0.81  0.35  0.05  0.01  0.57  0.65  0.55  0.41  ...   0.03   
81    0.09  0.43  0.05  0.81  0.11  0.22  0.47  0.54  0.38  0.47  ...   0.48   
674   0.14  0.28  0.47  0.61  0.04  0.01  0.32  0.44  0.28  0.55  ...   0.22   
1595  0.14  0.56  0.85  0.29  0.09  0.03  0.76  0.83  0.77  0.37  ...   0.17   
1835  0.21  0.73  0.70  0.10  0.09  0.76  0.52  0.60  0.41  0.29  ...   0.18   
205   0.16  0.32  0.02  0.90  0.21  0.05  0.56  0.61  0.53  0.39  ...   0.24   
1313  1.00  0.44  0.10  0.72  0.10  0.37

In [106]:
print (X_test)

       5     6     7     8     9     10    11    12    13    14   ...    117  \
693   0.06  0.29  1.00  0.13  0.04  0.01  0.38  0.49  0.33  0.62  ...   0.12   
630   0.17  0.54  1.00  0.00  0.01  0.10  0.55  0.50  0.33  0.37  ...   0.21   
464   0.27  0.29  0.04  0.90  0.13  0.04  0.39  0.47  0.32  0.52  ...   0.35   
738   0.09  0.24  0.02  0.95  0.10  0.01  0.32  0.57  0.38  0.46  ...   0.09   
739   0.19  0.57  0.08  0.83  0.25  0.11  0.44  0.48  0.26  0.06  ...   0.07   
1536  0.30  0.42  0.87  0.30  0.03  0.02  0.45  0.47  0.30  0.43  ...   0.34   
1345  0.10  0.49  0.09  0.80  0.41  0.08  0.40  0.40  0.22  0.22  ...   0.19   
496   0.11  0.39  0.30  0.69  0.10  0.17  0.26  0.46  0.26  0.40  ...   0.02   
1008  0.03  0.37  0.16  0.83  0.09  0.05  0.50  0.65  0.52  0.41  ...   0.44   
1711  0.20  0.28  0.06  0.79  0.30  0.20  0.34  0.52  0.36  0.38  ...   0.44   
525   0.60  0.36  0.06  0.67  0.10  0.64  0.39  0.51  0.32  0.35  ...   0.32   
1420  0.08  0.39  0.14  0.88  0.02  0.01

In [109]:
preds = enetcv.fit(X_train, y_train).predict(X_test)
print('Optimal Alpha Value: ', enetcv.alpha_)
print('RMSE (ENET CV reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))


Optimal Alpha Value:  0.01
RMSE (ENET CV reg.) = 0.161381503338


In [110]:
# use r2_score
# why score<0?
from sklearn.metrics import r2_score
preds = enet.fit(X_train, y_train).predict(X_test)
print('R^2 (ENET reg.) =', r2_score(y_test, preds))

R^2 (ENET reg.) = -0.0136555307053


In [115]:
from sklearn.linear_model import ElasticNet
enet = ElasticNet(alpha=0.01, l1_ratio=0.7)

enet.fit(X_train, y_train)
enet.coef_

array([ 0.        , -0.        ,  0.08672231, -0.22257432,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.        , -0.        , -0.        ,
       -0.        ,  0.        ,  0.05102937, -0.        , -0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
        0.        , -0.        ,  0.        ,  0.0700438 ,  0.        ,
        0.        , -0.        ,  0.        , -0.        , -0.        ,
        0.        ,  0.        , -0.        ,  0.03387024,  0.        ,
        0.        ,  0.        ,  0.        , -0.04775612, -0.14505711,
       -0.03623363, -0.        , -0.        , -0.        ,  0.        ,
        0.15742425,  0.        ,  0.        , -0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.        ,  0.        ,  0.        ,  0.        , -0.        ,
       -0.        ,  0.        , -0.        ,  0.        ,  0.  

### Task 2: Carry out Regularised Regression

1. Run all three forms of reularised regression on the Boston Housing DataSet
2. What do the coefficients mean?
3. What would you advise someone living in Boston to try and raise the value of their home?

In [11]:
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston
  
boston = load_boston()
scaler = StandardScaler()
X = scaler.fit_transform(boston["data"])
Y = boston["target"]
names = boston["feature_names"]
  
lasso = Lasso(alpha=.3)
lasso.fit(X, Y)

#A helper method for pretty-printing linear models
def pretty_print_linear(coefs, names = None, sort = False):
    if names == None:
        names = ["X%s" % x for x in range(len(coefs))]
    lst = zip(coefs, names)
    if sort:
        lst = sorted(lst,  key = lambda x:-np.abs(x[0]))
    return " + ".join("%s * %s" % (round(coef, 3), name)
                                   for coef, name in lst)
  
print("Lasso model: ", pretty_print_linear(lasso.coef_, names, sort = True))

Lasso model:  -3.707 * LSTAT + 2.992 * RM + -1.757 * PTRATIO + -1.081 * DIS + -0.7 * NOX + 0.631 * B + 0.54 * CHAS + -0.236 * CRIM + 0.081 * ZN + -0.0 * INDUS + -0.0 * AGE + 0.0 * RAD + -0.0 * TAX




In [89]:
X.head()

Unnamed: 0,5,6,7,8,9,10,11,12,13,14,...,117,118,119,120,121,122,123,124,125,126
0,0.19,0.33,0.02,0.9,0.12,0.17,0.34,0.47,0.29,0.32,...,0.29,0.12,0.26,0.2,0.06,0.04,0.9,0.5,0.32,0.14
16,0.15,0.31,0.4,0.63,0.14,0.06,0.58,0.72,0.65,0.47,...,0.22,0.06,0.39,0.84,0.06,0.06,0.91,0.5,0.88,0.26
20,0.25,0.54,0.05,0.71,0.48,0.3,0.42,0.48,0.28,0.32,...,0.36,0.09,0.46,0.05,0.09,0.05,0.88,0.5,0.76,0.13
21,1.0,0.42,0.47,0.59,0.12,0.05,0.41,0.53,0.34,0.33,...,1.0,1.0,0.07,0.15,1.0,0.35,0.73,0.0,0.31,0.21
23,0.11,0.43,0.04,0.89,0.09,0.06,0.45,0.48,0.31,0.46,...,0.29,0.16,0.12,0.07,0.04,0.01,0.81,1.0,0.56,0.09


In [90]:
y.head()

0     0.20
16    0.49
20    0.34
21    0.69
23    0.63
Name: 127, dtype: float64

In [92]:
print (boston.data.shape)

(506, 13)


In [48]:
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston
  
boston = load_boston()
scaler = StandardScaler()
X = scaler.fit_transform(boston["data"])
Y = boston["target"]
names = boston["feature_names"]
  
alpha_range = 10.**np.arange(-2, 3)
rregcv = RidgeCV(alphas=alpha_range, normalize=True)
rregcv.fit(X_train, y_train)

preds = rregcv.predict(X_test)
print('Optimal Alpha Value: ',rregcv.alpha_)
print ('RMSE (Ridge CV reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))


Optimal Alpha Value:  1.0
RMSE (Ridge CV reg.) = 0.163129782343


In [49]:

from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston
  
boston = load_boston()
scaler = StandardScaler()
X = scaler.fit_transform(boston["data"])
Y = boston["target"]
names = boston["feature_names"]
  
alpha_range = 10.**np.arange(-2, 3)
lascv = LassoCV(alphas=alpha_range, normalize=True)
lascv.fit(X_train, y_train)

preds = lascv.predict(X_test)
print('Optimal Alpha Value: ',lascv.alpha_)
print ('RMSE (Lasso CV reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))

Optimal Alpha Value:  0.01
RMSE (Lasso CV reg.) = 0.198165225429


In [47]:

from sklearn.linear_model import ElasticNetCV
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston
  
boston = load_boston()
scaler = StandardScaler()
X = scaler.fit_transform(boston["data"])
Y = boston["target"]
names = boston["feature_names"]
  
alpha_range = 10.**np.arange(-5, 6)
enetcv = ElasticNetCV(alphas=alpha_range, l1_ratio=0.7) 
enetcv.fit(X_train, y_train)

preds = enet.predict(X_test)
print('Optimal Alpha Value: ',enetcv.alpha_)
print ('RMSE (ENET reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))



Optimal Alpha Value:  0.01
RMSE (ENET reg.) = 0.160945198724


In [51]:
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston
  
boston = load_boston()
scaler = StandardScaler()
X = scaler.fit_transform(boston["data"])
Y = boston["target"]
names = boston["feature_names"]
  
enet = ElasticNet(alpha=.01)
enet.fit(X, Y)

#A helper method for pretty-printing linear models
def pretty_print_linear(coefs, names = None, sort = False):
    if names == None:
        names = ["X%s" % x for x in range(len(coefs))]
    lst = zip(coefs, names)
    if sort:
        lst = sorted(lst,  key = lambda x:-np.abs(x[0]))
    return " + ".join("%s * %s" % (round(coef, 3), name)
                                   for coef, name in lst)
  
print("ENET model: ", pretty_print_linear(enet.coef_, names, sort = True))

ENET model:  -3.711 * LSTAT + -3.014 * DIS + 2.697 * RM + 2.404 * RAD + -2.031 * PTRATIO + -1.956 * NOX + -1.835 * TAX + 1.023 * ZN + -0.887 * CRIM + 0.85 * B + 0.691 * CHAS + 0.042 * INDUS + -0.0 * AGE


