In [13]:
from sklearn.cross_validation import KFold
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet, Lars
from sklearn.metrics import mean_squared_error, r2_score
import math
import numpy as np
import pylab as pl
from sklearn.datasets import load_boston

In [14]:
boston = load_boston()

In [15]:
x = np.array([np.concatenate((v,[1])) for v in boston.data])
y = boston.target

In [16]:
a = 0.8
for name,met in [
        ('linear regression', LinearRegression()),
        ('lasso', Lasso(fit_intercept=True, alpha=a)),
        ('ridge', Ridge(fit_intercept=True, alpha=a)),
        ('elastic-net', ElasticNet(fit_intercept=True, alpha=a)),
        ('lars', Lars(fit_intercept=True, n_nonzero_coefs=1)),
        ]:
    met.fit(x,y)
    # p = np.array([met.predict(xi) for xi in x])
    p = met.predict(x)
    e = p-y
    total_error = np.dot(e,e)
    rmse_train = np.sqrt(total_error/len(p))
    std_dev = np.std(x)
    

    kf = KFold(len(x), n_folds=10)
    err = 0
    for train,test in kf:
        met.fit(x[train],y[train])
        #std_dev=math.sqrt(r2_score(x[train],y[train]))
        p = met.predict(x[test])
        e = p-y[test]
        err += np.dot(e,e)

    rmse_10cv = np.sqrt(err/len(x))
    print('Method: %s' %name)
    print('RMSE on training: %.4f' %rmse_train)
    print('RMSE on 10-fold CV: %.4f' %rmse_10cv)
    print ("\n")

Method: linear regression
RMSE on training: 4.6795
RMSE on 10-fold CV: 5.8819


Method: lasso
RMSE on training: 5.0526
RMSE on 10-fold CV: 5.7724


Method: ridge
RMSE on training: 4.6916
RMSE on 10-fold CV: 5.8325


Method: elastic-net
RMSE on training: 5.0873
RMSE on 10-fold CV: 5.5340


Method: lars
RMSE on training: 8.4729
RMSE on 10-fold CV: 8.7822


