In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import scale
from sklearn.preprocessing import normalize
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

# Preparing Data

In [2]:
SAMPLE_SIZE = 50
NUM_PREDICTORS = 20
NOISE_VAR = 16

X = np.random.rand(SAMPLE_SIZE, NUM_PREDICTORS)
X_scaled = scale(X, axis=0)
X_norm = normalize(X, norm='l2', axis=0)

In [3]:
y = (10 * X[:, 0] + 5 * X[:, 1] + 0.1 * X[:, 2] + 
     np.random.normal(loc=0, scale=NOISE_VAR, size=1)).reshape(SAMPLE_SIZE,1)
y_scaled = scale(y, axis=0)
y_norm = normalize(y, norm='l2', axis=0)

# Least Squares Linear Regression

In [4]:
clf = LinearRegression(normalize=True)
clf.fit(X, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=True)

In [5]:
clf.coef_

array([[ 1.00000000e+01,  5.00000000e+00,  1.00000000e-01,
        -2.72591271e-15, -2.41921089e-15, -3.14393950e-16,
        -1.83900263e-15,  8.97742057e-16,  2.28084479e-15,
         3.68396575e-16,  1.07786368e-15,  4.94753689e-16,
        -8.44681982e-16, -5.21867517e-16, -3.23604318e-15,
        -2.27684648e-15,  3.83762873e-15,  6.64257467e-16,
         2.25126226e-15, -6.16035058e-15]])

# Lasso

In [6]:
def lasso(alpha, X, y):
    clf_lasso = Lasso(alpha=alpha, normalize=True)
    clf_lasso.fit(X, y)
    return clf_lasso.coef_

In [12]:
alphas = [0.001, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]

d = []
for alpha in alphas:
    coeffs = lasso(alpha, X, y)
    d.append({'alpha': alpha, 'beta1': coeffs[0], 'beta2': coeffs[1]})

df_lasso = pd.DataFrame(d, columns=['alpha', 'beta1', 'beta2'])

In [13]:
df_lasso

Unnamed: 0,alpha,beta1,beta2
0,0.001,9.982312,4.985695
1,0.01,9.826605,4.830925
2,0.05,9.14386,4.077514
3,0.1,8.290537,3.135699
4,0.15,7.437346,2.193822
5,0.2,6.583966,1.252035
6,0.25,5.730585,0.310247
7,0.3,4.634039,0.0


# Ridge

In [14]:
def ridge(alpha, X, y):
    clf_ridge = Ridge(alpha, normalize=True)
    clf_ridge.fit(X, y)
    return clf_ridge.coef_    

In [15]:
alphas = [0.001, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]

d = []
for alpha in alphas:
    coeffs = ridge(alpha, X, y)
    d.append({'alpha': alpha, 'beta1': coeffs[0][0], 'beta2': coeffs[0][1]})

df_ridge = pd.DataFrame(d, columns=['alpha', 'beta1', 'beta2'])

In [16]:
df_ridge

Unnamed: 0,alpha,beta1,beta2
0,0.001,9.98245,5.001805
1,0.01,9.829527,5.015399
2,0.05,9.240273,5.031367
3,0.1,8.651909,4.988962
4,0.15,8.172153,4.911807
5,0.2,7.767373,4.8181
6,0.25,7.417544,4.717073
7,0.3,7.109787,4.613671
