In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import scale
from sklearn.preprocessing import normalize
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

# Preparing Data

In [2]:
SAMPLE_SIZE = 50
NUM_PREDICTORS = 20
NOISE_VAR = 16

X = np.random.rand(SAMPLE_SIZE, NUM_PREDICTORS)
X_scaled = scale(X, axis=0)
X_norm = normalize(X, norm='l2', axis=0)

In [3]:
y = (10 * X[:, 0] + 5 * X[:, 1] + 0.1 * X[:, 2] + 
     np.random.normal(loc=0, scale=NOISE_VAR, size=1)).reshape(SAMPLE_SIZE,1)
y_scaled = scale(y, axis=0)
y_norm = normalize(y, norm='l2', axis=0)

# Least Squares Linear Regression

In [4]:
clf = LinearRegression(normalize=True)
clf.fit(X, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=True)

In [5]:
clf.coef_

array([[ 1.00000000e+01,  5.00000000e+00,  1.00000000e-01,
        -1.03972960e-16, -7.66717861e-16, -2.05405013e-15,
        -3.30389217e-15,  2.72412737e-15,  4.69123817e-15,
         1.46961222e-15,  1.20455187e-15, -1.95064625e-15,
        -1.35311905e-15,  3.31909397e-15,  4.18137976e-15,
        -4.97283679e-15,  2.96074201e-17,  0.00000000e+00,
         5.52446827e-15,  3.10382516e-15]])

# Lasso

In [6]:
def lasso(alpha, X, y):
    clf_lasso = Lasso(alpha=alpha, normalize=True)
    clf_lasso.fit(X, y)
    return clf_lasso.coef_

In [7]:
alphas = [0.001, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]

d = []
for alpha in alphas:
    coeffs = lasso(alpha, X, y)
    d.append({'alpha': alpha, 'beta1': coeffs[0], 'beta2': coeffs[1]})

df_lasso = pd.DataFrame(d, columns=['alpha', 'beta1', 'beta2'])

In [8]:
df_lasso

Unnamed: 0,alpha,beta1,beta2
0,0.001,9.973277,4.974883
1,0.01,9.733599,4.743121
2,0.05,8.67019,3.700437
3,0.1,7.340929,2.397082
4,0.15,6.011668,1.093727
5,0.2,4.683641,0.0
6,0.25,3.362053,0.0
7,0.3,2.040466,0.0


# Ridge

In [9]:
def ridge(alpha, X, y):
    clf_ridge = Ridge(alpha, normalize=True)
    clf_ridge.fit(X, y)
    return clf_ridge.coef_    

In [10]:
alphas = [0.001, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]

d = []
for alpha in alphas:
    coeffs = ridge(alpha, X, y)
    d.append({'alpha': alpha, 'beta1': coeffs[0][0], 'beta2': coeffs[0][1]})

df_ridge = pd.DataFrame(d, columns=['alpha', 'beta1', 'beta2'])

In [11]:
df_ridge

Unnamed: 0,alpha,beta1,beta2
0,0.001,9.981279,4.991013
1,0.01,9.817902,4.912912
2,0.05,9.184981,4.61465
3,0.1,8.550051,4.318842
4,0.15,8.03235,4.0773
5,0.2,7.596821,3.872397
6,0.25,7.221981,3.694126
7,0.3,6.893754,3.536252
