# Types of GD:
* Batch GD
* Stochastic GD
* Mini batch GD

* Load diabetes dataset from sk learn
* Get results using normal LinearRegressor class for comparison

In [13]:
from sklearn.datasets import load_diabetes
import numpy as np
import time
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

X,y = load_diabetes(return_X_y=True)
print(X.shape)
print(y.shape)


(442, 10)
(442,)


In [2]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)
reg = LinearRegression()
reg.fit(X_train,y_train)

LinearRegression()

In [3]:
print(reg.coef_)
print(reg.intercept_)

[  -9.16088483 -205.46225988  516.68462383  340.62734108 -895.54360867
  561.21453306  153.88478595  126.73431596  861.12139955   52.41982836]
151.88334520854633


In [4]:
y_pred = reg.predict(X_test)
r2_score(y_test,y_pred)

0.4399387660024644

## Batch GD 

In [5]:
class GDRegressor:
    
    def __init__(self,learning_rate=0.01,epochs=100):
        
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
        
    def fit(self,X_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])
        
        for i in range(self.epochs):
            # update all the coef and the intercept
            y_hat = np.dot(X_train,self.coef_) + self.intercept_  # this is  matrix 
            #print("Shape of y_hat",y_hat.shape)
            intercept_der = -2 * np.mean(y_train - y_hat)
            self.intercept_ = self.intercept_ - (self.lr * intercept_der)
            
            coef_der = -2 * np.dot((y_train - y_hat),X_train)/X_train.shape[0]
            self.coef_ = self.coef_ - (self.lr * coef_der)
        
        print(self.intercept_,self.coef_)
    
    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [6]:
gdr = GDRegressor(epochs=1000,learning_rate=0.5)
gdr.fit(X_train,y_train)

152.0135263267291 [  14.38915082 -173.72674118  491.54504015  323.91983579  -39.32680194
 -116.01099114 -194.04229501  103.38216641  451.63385893   97.57119174]


In [9]:
y_pred = gdr.predict(X_test)
r2_score(y_test,y_pred)

0.4534524671450598

## stochastic GD

In [22]:
class SGDRegressor:
    
    def __init__(self,learning_rate=0.01,epochs=100):
        
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
        
    def fit(self,X_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])
        
        for i in range(self.epochs):
            for j in range(X_train.shape[0]):   # runs for every row in data
                idx = np.random.randint(0,X_train.shape[0])    #selects a index from data randomly
                
                y_hat = np.dot(X_train[idx],self.coef_) + self.intercept_  #this is a scalar value, not a matrix
                
                #derivative of intercept
                intercept_der = -2 * (y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - (self.lr * intercept_der)
                
                #derivative of coefficients
                coef_der = -2 * np.dot((y_train[idx] - y_hat),X_train[idx])
                self.coef_ = self.coef_ - (self.lr * coef_der)
        
        print(self.intercept_,self.coef_)
    
    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [23]:
sgd = SGDRegressor(learning_rate=0.01,epochs=40)
start = time.time()
sgd.fit(X_train,y_train)
print("The time taken is",time.time() - start)

152.22407219923466 [  56.29933027  -54.34314596  322.56036604  225.36316206   30.4030625
  -11.8754072  -151.23900214  123.77989899  290.06137732  115.32226618]
The time taken is 0.5025534629821777


In [24]:
y_pred = sgd.predict(X_test)
r2_score(y_test,y_pred)

0.4221340376192382

## SGD class of sk learn

In [25]:
from sklearn.linear_model import SGDRegressor
reg = SGDRegressor(max_iter=100,learning_rate='constant',eta0=0.01)
start = time.time()
reg.fit(X_train,y_train)
print("The time taken is",time.time() - start)

The time taken is 0.005807399749755859


In [17]:
y_pred = reg.predict(X_test)
r2_score(y_test,y_pred)

0.4328592508692001

## Mini batch GD

In [29]:
import random

class MBGDRegressor:
    
    def __init__(self,batch_size,learning_rate=0.01,epochs=100):
        
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        
    def fit(self,X_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])
        
        for i in range(self.epochs):
            
            for j in range(int(X_train.shape[0]/self.batch_size)):
                
                idx = random.sample(range(X_train.shape[0]),self.batch_size)
                
                y_hat = np.dot(X_train[idx],self.coef_) + self.intercept_
                #print("Shape of y_hat",y_hat.shape)
                intercept_der = -2 * np.mean(y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - (self.lr * intercept_der)

                coef_der = -2 * np.dot((y_train[idx] - y_hat),X_train[idx])
                self.coef_ = self.coef_ - (self.lr * coef_der)
        
        print(self.intercept_,self.coef_)
    
    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [30]:
mbr = MBGDRegressor(batch_size=int(X_train.shape[0]/50),learning_rate=0.01,epochs=100)
mbr.fit(X_train,y_train)

151.95118451048089 [  31.10062104 -144.79082807  457.10125574  306.50986245  -22.76511185
  -89.86924719 -191.9814387   113.1933633   404.48350227  113.92089213]


In [31]:
y_pred = mbr.predict(X_test)
r2_score(y_test,y_pred)

0.4541095465002125

## SGD regressor class of sk learn for implementing mini batch GD
Here there is no direct function for adding batch_size, so we make a function that tries to things in batches

In [32]:
from sklearn.linear_model import SGDRegressor
sgd = SGDRegressor(learning_rate='constant',eta0=0.1)

In [33]:
batch_size = 35

for i in range(100):
    
    idx = random.sample(range(X_train.shape[0]),batch_size)
    sgd.partial_fit(X_train[idx],y_train[idx])

In [34]:
sgd.coef_

array([  37.16568189,  -85.45625818,  324.65917615,  233.35279733,
         19.90448126,  -32.30417603, -169.25217266,  128.47402485,
        324.31976675,  107.70344111])

In [35]:
sgd.intercept_

array([162.82374475])

In [36]:
y_pred = sgd.predict(X_test)
r2_score(y_test,y_pred)

0.4160458098517019