In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [19]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
data = pd.DataFrame(diabetes.data)
data.columns = diabetes.feature_names
data['target'] = diabetes.target
data.insert(0,'bias',1)

X = data.drop(columns=['target'])  # Features
y = data[['target']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
columns = X_train.columns[1:]

## sklearn Linear Regression Result

In [21]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(f"Model R2: {r2_score(y_test, y_pred)}")

Model R2: 0.4526027629719195


In [60]:
print(f"Model intercept: {model.intercept_[0]}")

Model intercept: 151.34560453985995


In [25]:
coeffcients = pd.DataFrame([columns,model.coef_[0][1:]]).T
coeffcients = coeffcients.rename(columns={0: 'Attribute', 1: 'Coefficients'})
coeffcients

Unnamed: 0,Attribute,Coefficients
0,age,37.904021
1,sex,-241.964362
2,bmi,542.428759
3,bp,347.703844
4,s1,-931.488846
5,s2,518.062277
6,s3,163.419983
7,s4,275.317902
8,s5,736.198859
9,s6,48.670657


## Gradient Descent Approach

In [26]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [49]:
eta = 1e-2
epochs = 1000
N = X_train.shape[0]
M = X_train.shape[1]
W = np.zeros(M, dtype=int)

In [50]:
for epoch in range(epochs):
    for x_data,y_data in zip(X_train, y_train):
        pred = np.dot(x_data,W)
        g = (pred - y_data)*x_data
        W = W - eta*g

In [51]:
W = W.reshape(-1,1)

y_pred = X_test @ W

In [52]:
print(f"Model R2: {r2_score(y_test, y_pred)}")

Model R2: 0.4529443276337254


In [53]:
print(f"Model intercept: {W[0][0]}")

Model intercept: 148.9755609170459


In [54]:
W = np.around(W, decimals=4).reshape(-1)

In [55]:
coeffcients = pd.DataFrame([columns,W[1:]]).T
coeffcients = coeffcients.rename(columns={0: 'Attribute', 1: 'Coefficients'})
coeffcients

Unnamed: 0,Attribute,Coefficients
0,age,34.5205
1,sex,-238.1606
2,bmi,552.9508
3,bp,349.287
4,s1,-121.9385
5,s2,-107.6815
6,s3,-199.3277
7,s4,151.3265
8,s5,428.4489
9,s6,54.5093


## Closed Form Solution

In [56]:
W = np.linalg.inv(X_train.T @ X_train) @ X_train.T @ y_train

In [57]:
y_pred = X_test @ W

In [58]:
print(f"Model R2: {r2_score(y_test, y_pred)}")

Model R2: 0.45260276297192004


In [59]:
print(f"Model intercept: {W[0][0]}")

Model intercept: 151.34560453986


In [62]:
W = np.around(W, decimals=4).reshape(-1)

In [63]:
coeffcients = pd.DataFrame([columns,W[1:]]).T
coeffcients = coeffcients.rename(columns={0: 'Attribute', 1: 'Coefficients'})
coeffcients

Unnamed: 0,Attribute,Coefficients
0,age,37.904
1,sex,-241.9644
2,bmi,542.4288
3,bp,347.7038
4,s1,-931.4888
5,s2,518.0623
6,s3,163.42
7,s4,275.3179
8,s5,736.1989
9,s6,48.6707
