In [5]:
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score , mean_squared_error


In [6]:
x,y=load_diabetes(return_X_y=True)

In [7]:
X_train,X_test,y_train,y_test=train_test_split(x,y,random_state=2,test_size=0.2)

--lets build the sochastic gradient descent 

In [79]:
class SGD:
    def __init__(self, learning_rate=0.1, epoch=100):
        self.intercept = None
        self.coef_ = None
        self.learning_rate = learning_rate
        self.epoch = epoch

    def fit(self, x, y):
        n_sample, n_feature = x.shape
        self.intercept = 0
        self.coef_ = np.ones(n_feature)

        prev_loss = float('inf')  # initialize previous loss to infinity

        for i in range(self.epoch):
            for j in range(n_sample):  
                idx = np.random.randint(0, n_sample)

                Y_hat =self.intercept +np.dot(x[idx], self.coef_)
                intercept_dr = -2 * (y[idx] - Y_hat)
                coef_dr = -2 * np.dot((y[idx] - Y_hat),x[idx])
                self.intercept = self.intercept - (self.learning_rate * intercept_dr)
                self.coef_ = self.coef_ - (self.learning_rate * coef_dr)
                
           


---sgd

In [27]:
sg=SGDRegressor(max_iter=100,learning_rate='constant',eta0=0.01)

In [28]:
sg.fit(X_train,y_train)




In [29]:
sg.coef_

array([  55.76154952,  -66.32904153,  347.70576331,  246.13843663,
         18.26723614,  -27.4211046 , -172.33142557,  128.78286237,
        316.74834802,  128.26783798])

In [30]:
sg.intercept_

array([150.9840119])

In [31]:
r2_score(y_test,sg.predict(X_test))

0.4327401990275599

--- using 

In [100]:
sgd=SGD(learning_rate=0.5,epoch=1000)


In [104]:
sgd.fit(X_train,y_train)

In [95]:
class SGD:
    def __init__(self, learning_rate=0.1, epoch=100, early_stop_patience=10):
        self.intercept = None
        self.coef_ = None
        self.learning_rate = learning_rate
        self.epoch = epoch
        self.early_stop_patience = early_stop_patience
    
    def fit(self, x_train, y_train, x_val=None, y_val=None):
        n_sample, n_feature = x_train.shape
        self.intercept = 0
        self.coef_ = np.ones(n_feature)

        prev_loss = float('inf')  # initialize previous loss to infinity
        no_improvement_count = 0  # initialize counter for early stopping
        prev_val_loss = float('inf')  # initialize previous validation loss to infinity

        for i in range(self.epoch):
            for j in range(n_sample):  
                idx = np.random.randint(0, n_sample)

                y_hat =self.intercept + np.dot(x_train[idx], self.coef_)
                intercept_dr = -2 * (y_train[idx] - y_hat)
                coef_dr = -2 * np.dot((y_train[idx] - y_hat), x_train[idx])
                self.intercept = self.intercept - (self.learning_rate * intercept_dr)
                self.coef_ = self.coef_ - (self.learning_rate * coef_dr)

            # Calculate the current training loss
            y_train_pred = self.intercept + np.dot(x_train, self.coef_)
            current_train_loss = np.mean((y_train - y_train_pred) ** 2)
            
            # Calculate the current validation loss, if a validation set is provided
            if x_val is not None and y_val is not None:
                y_val_pred = self.intercept + np.dot(x_val, self.coef_)
                current_val_loss = np.mean((y_val - y_val_pred) ** 2)
                
                # Check if the validation loss has improved
                if current_val_loss >= prev_val_loss:
                    no_improvement_count += 1
                    if no_improvement_count >= self.early_stop_patience:
                        print(f'Stopping early at epoch {i}')
                        return
                else:
                    no_improvement_count = 0
                
                # save the previous validation loss for comparison in next iteration
                prev_val_loss = current_val_loss
            
            # Check if the improvement in training loss is less than the threshold
            if abs(prev_loss - current_train_loss) < 1e-7:
                print(f'Stopping early at epoch {i}')
                return
            
            # save the previous training loss for comparison in next iteration
            prev_loss = current_train_loss


In [102]:
sgd.coef_

array([-106.44017924, -319.29556705,  483.57125158,  250.03204808,
       -960.01743438,  544.74447027,  226.60356879,   92.40390269,
        803.54430755,  107.89575551])

In [103]:
sgd.intercept

188.14154447979172