In [1]:
import numpy as np
import sympy as sp
import matplotlib.pyplot as plt

x_symbol = sp.Symbol('x')

class OneVariable: 
    def __init__(self, x, y):
        self.x_mean = np.mean(x)
        self.x_std = np.std(x)
        self.y_mean = np.mean(y)
        self.y_std = np.std(y)
        self.x = (x - self.x_mean) / self.x_std
        self.y = (y - self.y_mean) / self.y_std
        self.m = 0 
        self.b = 0
        self.learningRate = 0.001
        self.maxSteps = 1000
        self.convergence_threshold = 1e-6
        self.round = None
        self.batch_size = 5

    #Sum of Squared Residuals 
    def SoSR(self, m = None, b = None):
        if m is None or b is None:
            m = self.m
            b = self.b
        SoSR = np.square(self.y - m * self.x - b)
        return np.sum(SoSR)

    #Derivative of SoSR with respect to m varible
    def dSoSR_dm(self, x_batch = None, y_batch = None):
        if x_batch is None or y_batch is None:
            x_batch = self.x
            y_batch = self.y
        dSoSR_dm = -2 * np.sum((y_batch - self.m * x_batch - self.b) * x_batch)
        return dSoSR_dm

    #Derivative of SoSR with respect to b varible
    def dSoSR_db(self, x_batch = None, y_batch = None):
        if x_batch is None or y_batch is None:
            x_batch = self.x
            y_batch = self.y
        dSoSR_db = -2 * np.sum(y_batch - self.m * x_batch - self.b)
        return dSoSR_db
    
    def LeastSquaresRegression(self):
        print("\033[1mLeast Squares Regression Line:\033[0m")
        x_squares_sum = np.sum(np.square(self.x))
        xy_sum = np.sum(self.x * self.y)
        n = len(self.x)
        self.m = (n * np.mean(self.x) * np.mean(self.y) - xy_sum) / (n * (np.mean(self.x) ** 2) - x_squares_sum)
        self.b = (np.mean(self.y) - self.m * np.mean(self.x))
        
        self.show()

    def GradientDescent(self):
        print("\033[1mGradient Descent:\033[0m")
        losses = []
        prev_loss = 0
        for i in range(self.maxSteps):
            m_gradient = self.dSoSR_dm()
            b_gradient = self.dSoSR_db()
            m_temp = self.m - self.learningRate * m_gradient
            b_temp = self.b - self.learningRate * b_gradient
            loss = self.SoSR(m_temp, b_temp)
            losses.append(loss)

            if abs(loss - prev_loss) < self.convergence_threshold:
                print("Converged at iteration", i)
                break
            prev_loss = loss
            self.m = m_temp
            self.b = b_temp

        self.show(losses)

    def MiniBatchGradientDescent(self):
        print("\033[1mMini Batch Gradient Descent:\033[0m")
        losses = []
        prev_loss = 0
        for i in range(self.maxSteps):
            for j in range(0, len(self.x), self.batch_size):
                x_batch = self.x[j : j + self.batch_size]
                y_batch = self.y[j : j + self.batch_size]
                m_gradient = self.dSoSR_dm(x_batch, y_batch)
                b_gradient = self.dSoSR_db(x_batch, y_batch)
                m_temp = self.m - self.learningRate * m_gradient
                b_temp = self.b - self.learningRate * b_gradient
                loss = self.SoSR(m_temp, b_temp)
                losses.append(loss)
                self.m = m_temp
                self.b = b_temp

            if abs(loss - prev_loss) < self.convergence_threshold:
                print("Converged at iteration", i)
                break
            prev_loss = loss

        self.show(losses)

    def StochasticGradientDescent(self):
        print("\033[1mStochastic Gradient Descent:\033[0m")
        losses = []
        prev_loss = 0
        for i in range(self.maxSteps):
            for j in range(len(self.x)):
                x_point = self.x[j]
                y_point = self.y[j]
                m_gradient = self.dSoSR_dm(np.array([x_point]), np.array([y_point]))
                b_gradient = self.dSoSR_db(np.array([x_point]), np.array([y_point]))
                m_temp = self.m - self.learningRate * m_gradient
                b_temp = self.b - self.learningRate * b_gradient
                loss = self.SoSR(m_temp, b_temp)
                losses.append(loss)
                self.m = m_temp
                self.b = b_temp

            if abs(loss - prev_loss) < self.convergence_threshold:
                print("Converged at iteration", i)
                break
            prev_loss = loss

        self.show(losses)

    def printEquation(self):
        m = round(self.m * self.y_std / self.x_std, self.round) if self.round is not None else self.m * self.y_std / self.x_std
        b = round(self.b * self.y_std + self.y_mean - m * self.x_mean * self.y_std / self.x_std, self.round) if self.round is not None else self.b * self.y_std + self.y_mean - m * self.x_mean * self.y_std / self.x_std
        y = m * x_symbol + b
        print(sp.pretty(y))


    def plotResults(self):
        plt.figure(figsize = (9, 7))
        plt.scatter(self.x * self.x_std + self.x_mean, self.y * self.y_std + self.y_mean, color = 'blue', label = 'Data')
        plt.plot(self.x * self.x_std + self.x_mean, (self.m * self.x + self.b) * self.y_std + self.y_mean, color = 'red', label = 'Regression Line')
        plt.xlabel('X')
        plt.ylabel('Y')
        plt.title('Data and Regression Line')
        plt.legend()
        plt.grid(True)
        plt.show()
    
    def plotLoss(self, losses):
        plt.figure(figsize=(9, 7))
        plt.plot(range(len(losses)), losses, label = 'Loss function')
        plt.xlabel('Iteration')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()
    
    def show(self, losses = None):
        if losses != None:
            self.printEquation()
            self.plotResults()
            self.plotLoss(losses)
        else: 
            self.printEquation()
            self.plotResults()