In [1]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
boston_obj = load_boston()
data_train, data_test, price_train, price_test = train_test_split(boston_obj.data, boston_obj.target)
data_train = np.delete(data_train, [2, 6], axis=1)
data_test = np.delete(data_test, [2, 6], axis=1)

data_train[:5, :]

array([[1.39140e-01, 0.00000e+00, 0.00000e+00, 5.10000e-01, 5.57200e+00,
        2.59610e+00, 5.00000e+00, 2.96000e+02, 1.66000e+01, 3.96900e+02,
        1.46900e+01],
       [1.44383e+01, 0.00000e+00, 0.00000e+00, 5.97000e-01, 6.85200e+00,
        1.46550e+00, 2.40000e+01, 6.66000e+02, 2.02000e+01, 1.79360e+02,
        1.97800e+01],
       [3.04900e-02, 5.50000e+01, 0.00000e+00, 4.84000e-01, 6.87400e+00,
        6.46540e+00, 5.00000e+00, 3.70000e+02, 1.76000e+01, 3.87970e+02,
        4.61000e+00],
       [3.56868e+00, 0.00000e+00, 0.00000e+00, 5.80000e-01, 6.43700e+00,
        2.89650e+00, 2.40000e+01, 6.66000e+02, 2.02000e+01, 3.93370e+02,
        1.43600e+01],
       [1.22472e+01, 0.00000e+00, 0.00000e+00, 5.84000e-01, 5.83700e+00,
        1.99760e+00, 2.40000e+01, 6.66000e+02, 2.02000e+01, 2.46500e+01,
        1.56900e+01]])

In [3]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

In [4]:
ridge1 = Ridge(alpha=1)    # alpha is a hyperparameter controlling regularization
ridge1.fit(data_train, price_train)
ridge1.predict([[    # An example prediction
    1,      # Per capita crime rate
    25,     # Proportion of land zoned for large homes
    1,      # Tract bounds the Charles River
    0.3,    # NOX concentration
    10,     # Average number of rooms per dwelling
    10,     # Weighted distance to employment centers
    3,      # Index for highway accessibility
    400,    # Tax rate
    15,     # Pupil/teacher ratio
    200,    # Index for number of blacks
    5       # % lower status of population
]])

array([37.41907809])

In [5]:
predprice = ridge1.predict(data_train)
mean_squared_error(price_train, predprice)

23.25118269704296

In [6]:
import pandas as pd
from pandas import DataFrame

In [7]:
alpha = [.125, .25, .5, 1, 2, 4, 8, 16, 32, 64, 128]    # Candidate alphas
res = dict()

for a in alpha:
    ridge2 = Ridge(alpha=a)
    res[a] = cross_val_score(ridge2, data_train, price_train, scoring='neg_mean_squared_error', cv = 10)

res_df = DataFrame(res)

res_df

Unnamed: 0,0.125,0.25,0.5,1.0,2.0,4.0,8.0,16.0,32.0,64.0,128.0
0,-14.635625,-14.701867,-14.835239,-15.068587,-15.403057,-15.81129,-16.301969,-16.956458,-17.894646,-19.201561,-20.828613
1,-23.629367,-23.552984,-23.469379,-23.429403,-23.49686,-23.71578,-24.129884,-24.818943,-25.888827,-27.413378,-29.345565
2,-14.682757,-14.680942,-14.694838,-14.744045,-14.836133,-14.95301,-15.073976,-15.204143,-15.389505,-15.736204,-16.376293
3,-47.824613,-47.735442,-47.606465,-47.444942,-47.249708,-46.975299,-46.537342,-45.867964,-45.000076,-44.150658,-43.655323
4,-35.714631,-35.687942,-35.658405,-35.635763,-35.610698,-35.527135,-35.284424,-34.758607,-33.875681,-32.791982,-31.960505
5,-20.01543,-20.047495,-20.133153,-20.309311,-20.578301,-20.901986,-21.27264,-21.766121,-22.50982,-23.567716,-24.775806
6,-26.315807,-26.473824,-26.717082,-27.019327,-27.273953,-27.322252,-27.03883,-26.396549,-25.502856,-24.622878,-24.136236
7,-15.33091,-15.291269,-15.24934,-15.235652,-15.29144,-15.451015,-15.755023,-16.275577,-17.107523,-18.317694,-19.871385
8,-13.783461,-13.672063,-13.52548,-13.380647,-13.28615,-13.264056,-13.327443,-13.53981,-14.037248,-14.970317,-16.386652
9,-39.304959,-39.461071,-39.702806,-40.005212,-40.262367,-40.318636,-40.063976,-39.498378,-38.738838,-38.016259,-37.626885


In [8]:
res_df.mean()

0.125     -25.123756
0.250     -25.130490
0.500     -25.159219
1.000     -25.227289
2.000     -25.328867
4.000     -25.424046
8.000     -25.478551
16.000    -25.508255
32.000    -25.594502
64.000    -25.878865
128.000   -26.496326
dtype: float64

In [9]:
ridge3 = Ridge(alpha=0.125)
ridge3.fit(data_train, price_train)

testpredprice = ridge3.predict(data_test)
mean_squared_error(price_test, testpredprice)

18.753019843734407