In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
%matplotlib inline

In [None]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
diabetes = load_diabetes()
dir(diabetes)

In [None]:
print(diabetes.DESCR)

In [None]:
diabetes_data = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
diabetes_data.head()

In [None]:
diabetes_target = np.array(diabetes.target)
diabetes_target[:5]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(diabetes_data, diabetes_target, test_size=0.3, random_state=42)

In [None]:
pd.Series(y_test).describe()

## Simple Linear Regression

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(X_train, y_train)

In [None]:
y_pred = lr.predict(X_test)

In [None]:
mean_absolute_error(y_true=y_test, y_pred=y_pred)

In [None]:
np.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))

In [None]:
sum(abs(lr.coef_) < 1*10**-2)

In [None]:
lr.coef_

## Using Polynomial features

In [None]:
scaler = StandardScaler()
second_degree_polynomial = PolynomialFeatures(degree=2, include_bias=True)
X_train_scaled = scaler.fit_transform(X_train)
X_train_scaled_poly = second_degree_polynomial.fit_transform(X_train_scaled)

In [None]:
X_train_scaled_poly.shape, y_train.shape

In [None]:
lr = LinearRegression()
lr.fit(X_train_scaled_poly, y_train)

In [None]:
X_test_scaled = scaler.fit_transform(X_test)
X_test_scaled_poly = second_degree_polynomial.fit_transform(X_test_scaled)

y_pred = lr.predict(X_test_scaled_poly)

In [None]:
mean_absolute_error(y_true=y_test, y_pred=y_pred)

In [None]:
np.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))

In [None]:
sum(abs(lr.coef_) < 1*10**-2)

## Polynomial Features with Ridge

In [None]:
def ridge_predict(alpha: int) -> None:
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train_scaled_poly, y_train)
    y_pred = ridge.predict(X_test_scaled_poly)
    print(f"MAE: {mean_absolute_error(y_true=y_test, y_pred=y_pred)}")
    print(f"RMSE: {np.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))}")
    print(f"Features used: {sum(abs(ridge.coef_) >1*10**-3)}")

In [None]:
scope = 5
for alpha in [1*10**magnitude for magnitude in range(-scope, scope)]:
    print(f"Alpha: {alpha}")
    ridge_predict(alpha=alpha)
    print("---")

## Feature correleation

In [None]:
second_degree_polynomial = PolynomialFeatures(degree=2, include_bias=False)
X = second_degree_polynomial.fit_transform(X_train)
data = pd.DataFrame(X)
data['target'] = y_train
data.head()

In [None]:
target_corr = data.corr().loc['target'][:-1]
target_corr

In [None]:
list(filter(lambda X: X[1] > 0.5, [(i, x) for i, x in enumerate(abs(target_corr))]))

In [None]:
pd.DataFrame(X)

In [None]:
new_data = pd.DataFrame(X)[[2, 8]]
new_data

In [None]:
y_train.shape

In [None]:
lr = LinearRegression()
lr.fit(new_data, y_train)

In [None]:
X_test_scaled_poly = pd.DataFrame(second_degree_polynomial.fit_transform(X_test))

y_pred = lr.predict(X_test_scaled_poly[[2, 8]])

In [None]:
mean_absolute_error(y_true=y_test, y_pred=y_pred)

In [None]:
np.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))