In [1]:
#importing the required libraries
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score
import numpy as np
import pandas as pd

In [2]:
X, y = load_diabetes(return_X_y = True)
X.shape, y.shape

((442, 10), (442,))

In [3]:
#creating the test and the train set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=4)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((353, 10), (89, 10), (353,), (89,))

#### Since we want to compare gradient descent methodology, therefore we use sgdregressor instead of Ridge from sklearn.linear_model , few solvers in Ridge also uses gradient descent

In [4]:
from sklearn.linear_model import SGDRegressor

In [6]:
reg = SGDRegressor(penalty = 'l2', max_iter = 500, eta0 = 0.1, learning_rate = 'constant', alpha = 0.001)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
print(r2_score(y_test, y_pred))
print(reg.intercept_)
print(reg.coef_)

0.46167343212675094
[154.97844376]
[  48.89726393 -161.957453    371.99439592  268.85337444   -8.46350637
  -60.98786526 -169.11805563  139.37661395  335.66116561   94.92910271]


### using solver = sparse_cg for gradient descent methodology in rigde from sklearn.linear_model

In [7]:
from sklearn.linear_model import Ridge

In [9]:
regressor = Ridge(alpha = 0.1, solver = 'sparse_cg')
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(r2_score(y_test, y_pred))
print(reg.intercept_)
print(reg.coef_)

0.4693262885389844
[154.97844376]
[  48.89726393 -161.957453    371.99439592  268.85337444   -8.46350637
  -60.98786526 -169.11805563  139.37661395  335.66116561   94.92910271]


### ridge regressor using gradient descent from scratch

In [16]:
class RigdeGD():
    def __init__(self, epochs, learning_rate, alpha):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.alpha = alpha
        self.intercept_ = None
        self.coef_ = None
        
    def fit(self, X_train, y_train):
        self.coef_ = np.ones(X_train.shape[1])
        self.intercept_ = 0
        
        thetha = np.insert(self.coef_, 0, self.intercept_)
        
        X_train = np.insert(X_train, 0, 1, axis = 1)
        
        for i in range(self.epochs):
            thetha_der = np.dot(X_train.T, X_train).dot(thetha) - np.dot(X_train.T, y_train) + self.alpha*thetha
            thetha = thetha - self.learning_rate*thetha_der
            
        self.coef_ = thetha[1:]
        self.intercept_ = thetha[0]
        
        
    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [17]:
reg = RigdeGD(epochs = 500, alpha = 0.001, learning_rate = 0.005)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

In [18]:
print(r2_score(y_test, y_pred))
print(reg.intercept_)
print(reg.coef_)

0.47379622696725343
150.86972442733898
[  46.65040212 -221.37723801  452.11770407  325.54645024  -29.09573382
  -96.47613436 -190.90029664  146.3286965   400.80754967   95.08979987]
