In [1]:
import numpy as np

In [3]:
class SGD:
    def __init__(self, lr = 0.01, epochs = 1000, batch_size = 32, tol = 1e-3):
        self.learning_rate = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.tolerance = tol 
        self.weights = None
        self.bias = None 

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias  
    
    def mean_squared_error(self, y_true, y_pred):
        return np.mean((y_true - y_pred)**2)
    
    def gradient(self, X_batch, y_batch):
        y_pred = self.predict(X_batch)
        error = y_pred - y_batch 
        gradient_weights = np.dot(X_batch.T, error) / X_batch.shape[0] 
        gradient_bias = np.mean(error) 
        return gradient_weights, gradient_bias 
    
    def fit(self, X, y):
        n_samples, n_features = X.shape 
        self.weights = np.random.randn(n_features)
        self.bias = np.random.randn() 

        for epoch in range(self.epochs):
            indices = np.random.permutation(n_samples)
            X_shuffled = X[indices]
            y_shuffled = y[indices] 

            for i in range(0, n_samples, self.batch_size):
                X_batch = X_shuffled[i:i+self.batch_size]
                y_batch = y_shuffled[i:i+self.batch_size]

                gradient_weights, gradient_bias = self.gradient(X_batch, y_batch)
                self.weights -= self.learning_rate*gradient_weights 
                self.bias -= self.learning_rate*gradient_bias 
            
            if epoch % 100 == 0 :
                y_pred = self.predict(X)
                loss = self.mean_squared_error(y , y_pred)
                print(f'Epoch {epoch} : , Loss : {loss}') 


            if np.linalg.norm(gradient_weights) < self.tolerance : 
                print("convergence reached")
                break 

        return self.weights, self.bias 


            

In [4]:
#create randome dataset with 100  rows and 5 coloums 

X = np.random.randn(100, 5)

#create corresponnign target value by adding random noise in dataset 

y = np.dot(X, np.array([1,2,3,4,5])) + np.random.randn(100)*0.1 


model = SGD(lr=0.01, epochs=1000, batch_size=32, tol=1e-3) 


w,b = model.fit(X,y) 


y_pred = w*X + b 

Epoch 0 : , Loss : 44.64571882045409
Epoch 100 : , Loss : 0.054739588680452325
Epoch 200 : , Loss : 0.010851255057184855
Epoch 300 : , Loss : 0.010751548720925841
Epoch 400 : , Loss : 0.010732402428372045
Epoch 500 : , Loss : 0.010735645619667897
Epoch 600 : , Loss : 0.010742011670094708
Epoch 700 : , Loss : 0.010738528393221014
Epoch 800 : , Loss : 0.010732094220865553
Epoch 900 : , Loss : 0.01073202187894904
