In [None]:
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import KFold

In [None]:
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
Y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)

# Lasso 
alpha float, default=1.0

    Constant that multiplies the L1 term. Defaults to 1.0. alpha = 0 is equivalent to an ordinary least square, solved by the LinearRegression object. For numerical reasons, using alpha = 0 with the Lasso object is not advised. Given this, you should use the LinearRegression object.


In [None]:
clf_lasso = linear_model.Lasso(alpha=0.1) # change increase alpha to increase regularization
clf_lasso.fit(X,y)
print(clf_lasso.coef_, clf_lasso.intercept_)

# Ridge
alpha {float, ndarray of shape (n_targets,)}, default=1.0

    Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to 1 / (2C) in other linear models such as LogisticRegression or LinearSVC. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number.


In [None]:
clf_ridge = linear_model.Ridge(alpha=0.1)
clf_ridge.fit(X,y)
print(clf_ridge.coef_, clf_ridge.intercept_)

# LOOCV

In [None]:
# clf = linear_model.Lasso(alpha=0.1)
clf = linear_model.Ridge(alpha=1) # choose one of lasso (L1) or ridge (L2), vary alpha, and check rmse

sum_sq_errors = 0
N = len(X)
for i in range(N):
    X_val, y_val = np.array([X[i]]), np.array([y[i]])
    X_train, y_train = np.delete(X, (i), axis=0), np.delete(y, (i), axis=0)

    clf.fit(X_train,y_train)
    pred_y_val = clf.predict(X_val)
    
    sq_error = (pred_y_val - y_val)**2
    sum_sq_errors += sq_error
    
rmse_val =  np.sqrt(sum_sq_errors / N)
print(rmse_val) # currently the dataset is random, so wouldn't make much sense

# k-fold CV

In [None]:
# clf = linear_model.Lasso(alpha=0.1)
clf = linear_model.Ridge(alpha=1) # choose one, vary alpha, and check rmse


kf = KFold(n_splits=3) # 3 fold CV
for train_index, val_index in kf.split(X):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    
    clf.fit(X_train,y_train)
    pred_y_val = clf.predict(X_val)
    
    sq_error = (pred_y_val - y_val)**2
    sum_sq_errors = np.sum(sq_error)
    rmse_fold = np.sqrt(sum_sq_errors / N) # rmse for each fold
    print(rmse_fold)