### 1. Prepare Data
#### 1.1 Get your X and y in the right shape

In [302]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [303]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()



In [304]:
diabetes.feature_names

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [305]:
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641


In [306]:
df['target'] = diabetes.target

In [307]:
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [308]:
df.isnull().sum()

age       0
sex       0
bmi       0
bp        0
s1        0
s2        0
s3        0
s4        0
s5        0
s6        0
target    0
dtype: int64

In [309]:
X = df.drop(columns='target')

In [310]:
y = df['target']

In [311]:
from sklearn.model_selection import train_test_split
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=42)


In [312]:
assert len(X_train) == len(y_train)
assert len(X_test) == len(y_test)

In [313]:
# Feature scalling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [314]:
import numpy as np

class BGD:
    def __init__(self, max_iter=10000, lr=0.01, tol=0.1):
        self.theta = None
        self.max_iter = max_iter
        self.lr = lr
        self.tol = tol   # tolerance to stop early
    
    def fit(self, X_train, y_train):
        intercept = np.ones((X_train.shape[0],1))
        X_train = np.concatenate((intercept,X_train), axis = 1)
        if self.theta is None:
            self.theta = np.zeros(X_train.shape[1])
        
        prev_loss = float('inf')
        
        for i in range(self.max_iter):
            y_hat = X_train @ self.theta
            grad = (X_train.T @ (y_hat - y_train))/(X_train.shape[0])
            self.theta = self.theta - (self.lr * grad)
            loss = np.mean((y_hat - y_train) ** 2)
            # print(f"Iteration {i}: loss = {loss}")
            if abs(prev_loss - loss) < self.tol:
                print(f"Stopped early at iteration {i}, loss={loss}")
                break
            prev_loss = loss
        
        return self.theta
    def predict(self,X_test):
        intercept = np.ones((X_test.shape[0],1))
        X_test = np.concatenate((intercept,X_test), axis = 1)
        y_hat = X_test @ self.theta
     
        return y_hat
        
        


In [315]:
model = BGD()



In [316]:
model.fit(X_train, y_train)


Stopped early at iteration 431, loss=2902.5976013062013


array([151.73584756,   1.85680004, -11.20379518,  25.99665468,
        16.38417941,  -4.4851279 ,  -6.12345354, -10.24947679,
         6.96975776,  19.51203318,   3.59362696])

In [None]:
y_pred = model.predict(X_test)

In [319]:
from sklearn.metrics import mean_absolute_error, r2_score
mse = mean_absolute_error(y_test,y_pred)
r2= r2_score(y_test,y_pred)
print(mse)
print(r2)


43.069807274251424
0.45257818546809503


In [321]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train,y_train)
y_predicted = lr.predict(X_test)
mean_squre = mean_absolute_error(y_test,y_predicted)
r2score = r2_score(y_test,y_predicted)
print(mean_squre,r2score)

42.79409467959994 0.45260276297191926
