# Batch Gradient Descent

In [1]:
from sklearn.datasets import load_diabetes

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [2]:
X, y = load_diabetes(return_X_y = True)

In [3]:
X.shape

(442, 10)

In [4]:
y.shape

(442,)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [6]:
model = LinearRegression()

In [7]:
model.fit(X_train, y_train)

In [8]:
model.coef_

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [9]:
model.intercept_

np.float64(151.34560453985995)

In [10]:
y_pred = model.predict(X_test)

In [11]:
r2_score(y_test, y_pred)

0.4526027629719195

In [12]:
X_train.shape

(353, 10)

In [24]:
class GDRegressor:
    def __init__(self, learning_rate = 0.01, epochs = 100):
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs


    def fit(self, X_train, y_train):
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])

        for i in range(self.epochs):
            y_hat = np.dot(X_train, self.coef_) + self.intercept_
            intercept_der = -2 * np.mean(y_train - y_hat)
            self.intercept_ = self.intercept_ - (intercept_der * self.lr)

            coef_der = -2 * np.dot((y_train - y_hat), X_train) / X_train.shape[0]
            self.coef_ = self.coef_ - (coef_der * self.lr)

        print(self.intercept_, self.coef_)

    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [25]:
gdr = GDRegressor(epochs = 1000, learning_rate = 0.5)

In [26]:
gdr.fit(X_train, y_train)

151.372591059285 [  41.82977756 -203.23644652  509.6557063   325.07401153  -71.07194191
 -119.33187737 -215.85264692  144.71021659  376.52729984  111.97619094]


In [27]:
y_pred = gdr.predict(X_test)
y_pred

array([142.60324436, 177.19785743, 141.81392016, 289.42485473,
       124.90535184,  98.60365889, 252.14627931, 191.54963765,
        88.40748413, 115.47707116,  96.35172199, 154.79261989,
        65.93345787, 209.01670243, 105.5371887 , 135.53287907,
       223.24472755, 246.27551787, 193.69872198, 213.24851218,
       200.6151743 ,  89.4120434 ,  76.45489015, 188.45938192,
       153.95822179, 164.44818092, 187.74161676, 176.23289882,
        51.96425047, 117.05515562, 179.72382009,  94.33502363,
       133.32567487, 181.90899644, 173.06018732, 189.57822444,
       126.93785988, 123.66931004, 152.70864574,  61.45019841,
        81.66258969, 112.14060195, 158.75279231, 153.74444194,
       173.68627721,  66.11607616,  82.61251646, 106.0700445 ,
        61.79508243, 154.92928898, 152.69311452,  65.86799275,
       116.86807666, 109.79384523, 169.644332  , 154.74357987,
        98.77277237, 203.03032199, 115.27660378,  69.06617418,
       183.18750647, 195.93902285, 141.41534077, 111.71

In [28]:
r2_score(y_test, y_pred)

0.4588776166235029