# Lab 7b - Regularisation

### Regularised regression with Ridge & LASSO

- FUNCTIONS: Ridge, RidgeCV, Lasso, LassoCV
- DOCUMENTATION: http://scikit-learn.org/stable/modules/linear_model.html
- DATA: 
  - Dataset 'Crime' (n=319 non-null, p=122, type=regression)
    - This data set contains data on violent crimes within a community.
    - Data Dictionary: http://archive.ics.uci.edu/ml/datasets/Communities+and+Crime
  - Dataset 'boston' 
    - This data set contains Boston house prices and candidate predictors.

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as st

In [2]:
# read data, remove categorical features, remove rows with missing values
crime = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data', 
                    header=None, na_values=['?'])
crime = crime.iloc[:, 5:]
crime.dropna(inplace=True)
crime.head()

Unnamed: 0,5,6,7,8,9,10,11,12,13,14,...,118,119,120,121,122,123,124,125,126,127
0,0.19,0.33,0.02,0.9,0.12,0.17,0.34,0.47,0.29,0.32,...,0.12,0.26,0.2,0.06,0.04,0.9,0.5,0.32,0.14,0.2
16,0.15,0.31,0.4,0.63,0.14,0.06,0.58,0.72,0.65,0.47,...,0.06,0.39,0.84,0.06,0.06,0.91,0.5,0.88,0.26,0.49
20,0.25,0.54,0.05,0.71,0.48,0.3,0.42,0.48,0.28,0.32,...,0.09,0.46,0.05,0.09,0.05,0.88,0.5,0.76,0.13,0.34
21,1.0,0.42,0.47,0.59,0.12,0.05,0.41,0.53,0.34,0.33,...,1.0,0.07,0.15,1.0,0.35,0.73,0.0,0.31,0.21,0.69
23,0.11,0.43,0.04,0.89,0.09,0.06,0.45,0.48,0.31,0.46,...,0.16,0.12,0.07,0.04,0.01,0.81,1.0,0.56,0.09,0.63


In [5]:
# optional: read column names:
crimenames = pd.read_csv('communities.data.names', header=None)
crimenames = crimenames.iloc[5:, :]
crimenames.tail()

Unnamed: 0,0
123,LemasPctPolicOnPatr
124,LemasGangUnitDeploy
125,LemasPctOfficDrugUn
126,PolicBudgPerPop
127,ViolentCrimesPerPop


In [31]:
# define X and y
X = crime.iloc[:, :-1]
y = crime.iloc[:, -1]

# split into train/test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [7]:
# How many columns are in X?
X.shape

(319, 122)

### Linear Regression Model Without Regularisation 

In [8]:
# linear regression
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit(X_train, y_train)
lm.intercept_
lm.coef_
# What are these numbers? coefficients of all the features in X_train

array([-3.66188167e+00,  6.98124465e-01, -2.61955467e-01, -2.85270027e-01,
       -1.64740837e-01,  2.46972333e-01, -1.09290051e+00, -5.96857796e-01,
        1.11200239e+00, -7.21968931e-01,  4.27346598e+00, -2.28040268e-01,
        8.04875769e-01, -2.57934732e-01, -2.63458023e-01, -1.04616958e+00,
        6.07784197e-01,  7.73552561e-01,  5.96468029e-02,  6.90215922e-01,
        2.16759430e-02, -4.87802949e-01, -5.18858404e-01,  1.39478815e-01,
       -1.24417942e-01,  3.15003821e-01, -1.52633736e-01, -9.65003927e-01,
        1.17142163e+00, -3.08546690e-02, -9.29085548e-01,  1.24654586e-01,
        1.98104506e-01,  7.30804821e-01, -1.77337294e-01,  8.32927588e-02,
        3.46045601e-01,  5.01837338e-01,  1.57062958e+00, -4.13478807e-01,
        1.39350802e+00, -3.49428114e+00,  7.09577818e-01, -8.32141352e-01,
       -1.39984927e+00,  1.02482840e+00,  2.13855006e-01, -6.18937325e-01,
        5.28954490e-01,  7.98294890e-02,  5.93688560e-02, -1.68582667e-01,
        7.31264051e-01, -

In [9]:
st.describe(lm.coef_)

DescribeResult(nobs=122, minmax=(-36.794120528700006, 36.715295684780365), mean=-0.008324631712139551, variance=23.294819823700394, skewness=-0.021465688332930867, kurtosis=53.06238369462364)

In [10]:
# make predictions and evaluate
import numpy as np
from sklearn import metrics
preds = lm.predict(X_test)
print('RMSE (no regularisation) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))

RMSE (no regularisation) = 0.2338136764948684


### Ridge Regression Model 

In [11]:
# ridge regression (alpha must be positive, larger means more regularisation)
from sklearn.linear_model import Ridge
rreg = Ridge(alpha=0.1, normalize=True)
rreg.fit(X_train, y_train)
rreg.coef_
preds = rreg.predict(X_test)
print('RMSE (Ridge reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))
# Is this model better? Why? Yes, the ridge regression model has lower RMSE 

RMSE (Ridge reg.) = 0.16427906804924083


#### Ridge Regression with Cross-Validation 

In [12]:
# use RidgeCV to select best alpha:
from sklearn.linear_model import RidgeCV
alpha_range = 10.**np.arange(0, 3)
rregcv = RidgeCV(normalize=True, scoring='neg_mean_squared_error', alphas=alpha_range)
rregcv.fit(X_train, y_train)

# Print the optimal value of Alpha for Ridge Regression
print('Optimal Alpha Value: ', rregcv.alpha_)

# Print the RMSE for the ridge regression model
preds = rregcv.predict(X_test)
print ('RMSE (Ridge CV reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))
# What is the range of alpha values we are searching over?

Optimal Alpha Value:  1.0
RMSE (Ridge CV reg.) = 0.16312978234269107


### LASSO Regression Model 

In [28]:
# lasso (alpha must be positive, larger means more regularisation)
from sklearn.linear_model import Lasso
las = Lasso(alpha=0.01, normalize=True)
las.fit(X_train, y_train)
las.coef_
preds = las.predict(X_test)
print('RMSE (Lasso reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))

RMSE (Lasso reg.) = 0.19816522542866313


In [29]:
# try a smaller alpha
las = Lasso(alpha=0.001, normalize=True)
las.fit(X_train, y_train)
las.coef_
preds = las.predict(X_test)
print('RMSE (Lasso reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))

RMSE (Lasso reg.) = 0.16003902404387874


In [15]:
alpha_range

array([  1.,  10., 100.])

In [32]:
# use LassoCV to select best alpha (tries 100 alphas by default)
from sklearn.linear_model import LassoCV
alpha_range = 10.**np.arange(-5, 5)
print(alpha_range)
lascv = LassoCV(normalize=True, alphas=alpha_range, max_iter=10000)
lascv.fit(X_train, y_train)
print('Optimal Alpha Value: ',lascv.alpha_)
lascv.coef_
preds = lascv.predict(X_test)
print('RMSE (Lasso CV reg.) =', np.sqrt(metrics.mean_squared_error(y_test, preds)))

[1.e-05 1.e-04 1.e-03 1.e-02 1.e-01 1.e+00 1.e+01 1.e+02 1.e+03 1.e+04]




Optimal Alpha Value:  0.001
RMSE (Lasso CV reg.) = 0.16003902404387874


In [15]:
? LassoCV

### Task 1: Elastic Net Regularised Regression

#### Look up [Elastic Net](http://scikit-learn.org/stable/modules/linear_model.html#elastic-net) and complete the following.


(1) What is elastic net?
It is another linear regression model with combined L1 and L2 from the Lasso regression model as a regularizer.

(2) How does it work?


(3) Run elastic net on the above dataset.

In [35]:
l1 = np.linspace(0, 1, 5)
l1

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [37]:
# Set up and run the elastic net model
from sklearn.linear_model import ElasticNetCV

enet = ElasticNetCV(alphas=alpha_range, max_iter=1000)
enet.fit(X_train, y_train)
print('Optimal Alpha Value: ', enet.alpha_)
enet.coef_
preds = enet.predict(X_test)
print('RMSE (Elastic Net CV reg.)= ', np.sqrt(metrics.mean_squared_error(y_test, preds)))

Optimal Alpha Value:  0.01
RMSE (Elastic Net CV reg.)=  0.16039978886360107




### Task 2: Carry out Regularised Regression

(1) Run all three forms of regularised regression on the Boston Housing dataset.

(2) What do the coefficients mean?

(3) What would you advise someone living in Boston to try and raise the value of their home?


In [38]:
# load libraries and data:
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston

# Nb. the sklearn.datasets.load_*() functions return a 'bunch' object:
# (ref = http://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_mldata.html)
boston = load_boston()

# standardise variables:
scaler = StandardScaler()
X = scaler.fit_transform(boston["data"])

Y = boston["target"]
names = boston["feature_names"]

# Split into test and training data

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# Define an alpha range

alpha_range = 10**np.linspace(-5, 5)

In [40]:
# Build Lasso models over a range of alpha:

lascv = LassoCV(alphas=alpha_range, max_iter=10000, normalize=True)
lascv.fit(X_train, y_train)
print('Optimal Alpha Value: ', lascv.alpha_)
print(lascv.coef_)
preds = lascv.predict(X_test)
print('RMSE (Lasso CV Reg.): ', np.sqrt(metrics.mean_squared_error(y_test, preds)))



Optimal Alpha Value:  0.001
[ 0.          0.          0.00891952 -0.27423369  0.          0.
  0.         -0.         -0.          0.          0.          0.
 -0.         -0.         -0.         -0.19414627  0.          0.
 -0.         -0.         -0.         -0.         -0.         -0.
 -0.          0.          0.          0.          0.04335664 -0.
  0.         -0.          0.03491474 -0.         -0.06685424  0.
  0.         -0.          0.10575313  0.          0.          0.00890807
  0.         -0.1378172  -0.30954312 -0.         -0.         -0.
 -0.          0.          0.          0.          0.         -0.
  0.          0.          0.          0.          0.          0.
 -0.          0.          0.          0.         -0.          0.
 -0.         -0.          0.          0.05257892 -0.          0.
 -0.         -0.          0.          0.          0.          0.
  0.         -0.         -0.         -0.         -0.         -0.
 -0.         -0.          0.         -0.         -0.  

In [42]:
# Build cross-validated Ridge regression model

ridcv = RidgeCV(alphas=alpha_range, scoring='neg_mean_squared_error', normalize=True)
ridcv.fit(X_train, y_train)
print('Optimal Alpha Value: ', ridcv.alpha_)
print(ridcv.coef_)
preds = ridcv.predict(X_test)
print('RMSE (Ridge CV Reg.): ', np.sqrt(metrics.mean_squared_error(y_test, preds)))

Optimal Alpha Value:  1.0
[-1.35479199e-03  3.66493623e-03  5.67246153e-02 -6.65596102e-02
  7.50154730e-03  3.73570277e-03  1.48458510e-02 -8.26212596e-03
 -9.14751985e-04  8.17782143e-03 -9.58221848e-04  3.99333039e-03
 -2.04774531e-02 -3.80310378e-02 -7.06311041e-02 -9.07995340e-02
  3.92365601e-03  3.68491166e-02 -2.35269424e-02 -1.36618143e-02
 -9.83437557e-03  1.30086791e-02 -3.43297706e-02 -5.04638755e-02
 -9.82883411e-04  7.47392898e-02  2.63572032e-02 -1.07987605e-02
  3.16035521e-02 -2.17283831e-02 -4.45588182e-03 -1.06490401e-02
  4.42829964e-02 -3.72944143e-02 -6.18713730e-02  3.20124805e-02
  5.85549588e-03 -1.23569409e-02  6.53560040e-02  3.46461301e-02
  6.00524147e-02  6.39805254e-02  2.58651194e-02 -6.73126020e-02
 -7.02669216e-02 -5.05555985e-02 -6.41318316e-02  8.24959798e-03
  9.27945661e-03  2.77399795e-03  5.26650167e-02 -3.83854430e-03
 -6.04984296e-03 -1.53114959e-02  1.72393078e-02  2.11864055e-02
  4.40697120e-04  3.32044620e-03  7.16243927e-03  6.66440446e-03

In [None]:
# Build cross-validated Elastic Net Model



In [None]:
# Print dataset description



### *Interpret Results*



In [23]:
l = ['/']


SyntaxError: EOL while scanning string literal (<ipython-input-23-ff2df81d750b>, line 2)