In [1]:
from datasets import diabetes_data

import statsmodels.api as sm

from sklearn.linear_model import Lasso
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

import numpy as np
import pandas as pd

from tools import polynomial_features

import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [2]:
original_X, original_y, train_X, train_y, test_X, test_y = diabetes_data()

# Lasso Regression

## Alpha selection

In [3]:
n_alphas = 80
alphas = np.logspace(-2, 2, n_alphas)

In [4]:
# we pick alpha before the drop of the R^2 value
r2_means = []

for i in alphas:
    r2_means.append(cross_val_score(Lasso(alpha=i), original_X, original_y).mean())

result = pd.DataFrame(zip(alphas, r2_means), columns=['alpha', 'R^2 (mean)'])
fig = px.line(result, x='alpha', y='R^2 (mean)')
fig.update_xaxes(type='log')
fig.show()

## Coefficients

In [5]:
coefs = []
for a in alphas:
    ridge = Lasso(alpha=a)
    ridge.fit(original_X, original_y)
    coefs.append(ridge.coef_)

features = original_X.columns

df_coefs = pd.DataFrame(coefs, columns=features)
df_coefs['alphas'] = alphas
fig = px.line(df_coefs, x='alphas', y=features)
fig.update_xaxes(type='log').show()

## Fit Lasso Regression

In [6]:
model = Lasso(alpha=1.6).fit(original_X, original_y)

In [7]:
model.score(original_X, original_y)

0.19559197774996095

# Polynomial Lasso Regression

In [8]:
original_X_3 = polynomial_features(original_X, 3)

In [9]:
original_X_3 = pd.DataFrame(StandardScaler().fit_transform(original_X_3), columns=original_X_3.columns)

In [10]:
original_X_3.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,...,age^3,sex^3,bmi^3,bp^3,s1^3,s2^3,s3^3,s4^3,s5^3,s6^3
0,0.8005,1.065488,1.297088,0.45984,-0.929746,-0.732065,-0.912451,-0.054499,0.418551,-0.370989,...,0.275032,1.065488,0.377399,-0.064821,-0.286006,-0.16187,-0.273916,-0.153371,-0.060035,-0.063947
1,-0.039567,-0.938537,-1.08218,-0.553511,-0.177624,-0.402886,1.564414,-0.830301,-1.436551,-1.938479,...,0.085271,-0.938537,-0.443382,-0.154693,-0.092651,-0.097913,0.533708,-0.273101,-0.898721,-1.854866
2,1.793307,1.065488,0.934533,-0.119218,-0.958674,-0.718897,-0.680245,-0.054499,0.060207,-0.545154,...,2.2185,1.065488,0.05236,-0.098143,-0.304752,-0.157803,-0.195606,-0.153371,-0.080217,-0.09142
3,-1.872441,-0.938537,-0.243771,-0.770658,0.256292,0.525397,-0.757647,0.721302,0.477072,-0.196823,...,-2.342956,-0.938537,-0.145283,-0.251742,-0.087215,-0.056747,-0.216753,-0.074819,-0.050301,-0.053193
4,0.113172,-0.938537,-0.764944,0.45984,0.082726,0.32789,0.171178,-0.054499,-0.672582,-0.980568,...,0.08583,-0.938537,-0.248335,-0.064821,-0.091156,-0.078224,-0.139319,-0.153371,-0.164274,-0.284748


## Alpha selection

In [11]:
n_alphas = 80
alphas = np.logspace(-1, 2, n_alphas)

In [12]:
# we pick alpha before the drop of the R^2 value
r2_means = []

for i in alphas:
    r2_means.append(cross_val_score(Lasso(alpha=i, max_iter=5000), original_X_3, original_y).mean())

result = pd.DataFrame(zip(alphas, r2_means), columns=['alpha', 'R^2 (mean)'])
fig = px.line(result, x='alpha', y='R^2 (mean)')
fig.update_xaxes(type='log')
fig.show()

## Coefficients

In [13]:
coefs = []
for a in alphas:
    ridge = Lasso(alpha=a, max_iter=5000)
    ridge.fit(original_X_3, original_y)
    coefs.append(ridge.coef_)

features = original_X_3.columns

df_coefs = pd.DataFrame(coefs, columns=features)
df_coefs['alphas'] = alphas
fig = px.line(df_coefs, x='alphas', y=features)
fig.update_xaxes(type='log').show()

## Fit Polynomial Ridge Regression

In [14]:
model = Lasso(alpha=1.7)
model = model.fit(original_X_3, original_y)

In [15]:
model.score(original_X_3, original_y)

0.5363567412348906