In [9]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score

In [10]:
X,y = load_diabetes(return_X_y=True)

In [11]:
# call for train test split model

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.2,random_state=4)

In [12]:
# first we look, how to apply "gradient_descent" wala "Ridge_Regressor" using sklearn

In [13]:
from sklearn.linear_model import SGDRegressor

# obviously, it uses the "Stochastic_Gradient_Descent" wala version

In [14]:
reg = SGDRegressor(penalty='l2', max_iter=500, eta0=0.1, learning_rate='constant', alpha=0.001)   # 'l2' --> L2 norm

In [15]:
reg.fit(X_train,y_train)

y_pred = reg.predict(X_test)
print("R2 score",r2_score(y_test,y_pred))

R2 score 0.45413504436575924


In [16]:
print(reg.intercept_)
print(reg.coef_)

[162.36789981]
[  48.27996864 -155.72047654  370.24465796  269.78977572   -6.29083183
  -59.5308553  -166.43271384  136.29915152  327.62328677   95.78601363]


In [17]:
# we can solve it for 'Gradient_Descent' also by using 'Ridge_Regression'...

from sklearn.linear_model import Ridge

reg = Ridge(alpha=0.001, max_iter=500, solver='sparse_cg')

In [18]:
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)

print("R2_score:", r2_score(y_test,y_pred))

R2_score: 0.46250101621736295


In [19]:
print(reg.intercept_)
print(reg.coef_)

151.10198521698274
[  34.52193767 -290.84083006  482.40183073  368.06788244 -852.44871299
  501.59162206  180.1111415   270.76336403  759.73536616   37.49137216]


In [20]:
# Let's try to make our own class for same

In [21]:
class MeraRidgeGD:

    def __init__(self,epochs,learning_rate,alpha):

        self.learning_rate = learning_rate
        self.alpha = alpha
        self.epochs = epochs
        self.intercept_ = None
        self.coef_ = None

        # Go to "NOTES" for better clarification :-
    
    def fit(self,X_train,y_train):

        self.intercept_ = 0 
        self.coef_ = np.ones(X_train.shape[1])    # Add 1's to starting of the each column  

        # we make a matrix named 'theta',in this total terms=(n+1), starting with 'intercept' and iske aage saare 'coefficients' honge.
        # usme se 'n' terms --> coefficient and '1' term--> intercept term

        theta = np.insert(self.coef_, 0, self.intercept_)   
            
             # it means we want to print our result in the form of [w0,w1,w2,....,wn] --> (which is the correct way)
             # if we write, theta = np.insert(self.intercept_, 0, self.coef_),then it prints it in the form of [w1,w2,...,wn,w0] which is wrong way.

             # Similarly, we also transform 'X_train' in similar way :- 
             
        X_train = np.insert(X_train, 0, 1, axis=1)   # So, 'X_train' looks like this (with the extra 1's in the first column)
        
        for i in range(self.epochs):
             # Now, come for finding original_value of "theta" :- (use notes for this)          

             theta_der = np.dot(X_train.T, X_train).dot(theta) - np.dot(X_train.T, y_train) + (self.alpha * theta)    
             theta = theta - (self.learning_rate * theta_der)
       
              
        self.intercept_ = theta[0]
        self.coef_ = theta[1:]
    
    def predict(self,X_test):

        return np.dot(X_test, self.coef_) + self.intercept_
        

In [22]:
reg = MeraRidgeGD(epochs=500,alpha=0.001,learning_rate=0.005)

reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)

print("R2_score:", r2_score(y_test, y_pred))

# so, for same no. of epochs, it gives better "r2_score"  

R2_score: 0.47380182802609117


In [23]:
print(reg.intercept_)
print(reg.coef_)

150.86975316713463
[  46.65050914 -221.3750037   452.12080647  325.54248128  -29.09464178
  -96.47517735 -190.90017011  146.32900372  400.80267299   95.09048094]
