In [43]:
from datasets import diabetes_data

import statsmodels.api as sm

from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score

import numpy as np
import pandas as pd

from tools import polynomial_features

import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [30]:
original_X, original_y, train_X, train_y, test_X, test_y = diabetes_data()

# Ridge Regression

## Alpha selection

In [31]:
n_alphas = 80
alphas = np.logspace(-2, 5, n_alphas)

In [32]:
# we pick alpha before the drop of the R^2 value
r2_means = []

for i in alphas:
    r2_means.append(cross_val_score(Ridge(alpha=i), original_X, original_y).mean())

result = pd.DataFrame(zip(alphas, r2_means), columns=['alpha', 'R^2 (mean)'])
fig = px.line(result, x='alpha', y='R^2 (mean)')
fig.update_xaxes(type='log')
fig.show()

## Coefficients

In [40]:
coefs = []
for a in alphas:
    ridge = Ridge(alpha=a)
    ridge.fit(original_X, original_y)
    coefs.append(ridge.coef_)

features = original_X.columns

df_coefs = pd.DataFrame(coefs, columns=features)
df_coefs['alphas'] = alphas
fig = px.line(df_coefs, x='alphas', y=features)
fig.update_xaxes(type='log').show()

## Fit Ridge Regression

In [38]:
model = Ridge(alpha=0.17)
model = model.fit(original_X, original_y)

In [39]:
model.score(original_X, original_y)

0.5093131467978795

# Polynomial Ridge Regression

In [44]:
original_X_3 = polynomial_features(original_X, 3)

In [45]:
original_X_3.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,...,age^3,sex^3,bmi^3,bp^3,s1^3,s2^3,s3^3,s4^3,s5^3,s6^3
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,...,5.520148e-05,0.00013,0.000235,1.046373e-05,-8.648868e-05,-4.2e-05,-8.175128e-05,-1.741954e-08,7.890607e-06,-5.494752e-06
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,...,-6.666077e-09,-8.9e-05,-0.000136,-1.824927e-05,-6.030779e-07,-7e-06,0.0004120228,-6.159891e-05,-0.0003190284,-0.0007838807
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,...,0.0006206266,0.00013,8.8e-05,-1.823432e-07,-9.481539e-05,-4e-05,-3.387363e-05,-1.741954e-08,2.34863e-08,-1.743511e-05
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,...,-0.0007064657,-8.9e-05,-2e-06,-4.925509e-05,1.81164e-06,1.6e-05,-4.680222e-05,4.038488e-05,1.168476e-05,-8.205283e-07
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,...,1.559868e-07,-8.9e-05,-4.8e-05,1.046373e-05,6.092353e-08,4e-06,5.397674e-07,-1.741954e-08,-3.274173e-05,-0.0001014612


## Alpha selection

In [47]:
n_alphas = 80
alphas = np.logspace(-2, 5, n_alphas)

In [48]:
# we pick alpha before the drop of the R^2 value
r2_means = []

for i in alphas:
    r2_means.append(cross_val_score(Ridge(alpha=i), original_X_3, original_y).mean())

result = pd.DataFrame(zip(alphas, r2_means), columns=['alpha', 'R^2 (mean)'])
fig = px.line(result, x='alpha', y='R^2 (mean)')
fig.update_xaxes(type='log')
fig.show()

## Coefficients

In [50]:
coefs = []
for a in alphas:
    ridge = Ridge(alpha=a)
    ridge.fit(original_X_3, original_y)
    coefs.append(ridge.coef_)

features = original_X_3.columns

df_coefs = pd.DataFrame(coefs, columns=features)
df_coefs['alphas'] = alphas
fig = px.line(df_coefs, x='alphas', y=features)
fig.update_xaxes(type='log').show()

## Fit Polynomial Ridge Regression

In [52]:
model = Ridge(alpha=0.11)
model = model.fit(original_X_3, original_y)

In [54]:
model.score(original_X_3, original_y)

0.5136018216214185