In [1]:
import random

class LinearRegression:
    def __init__(self, learning_rate=0.01):
        self.bias = random.random()
        self.weight = random.random()
        self.learning_rate = learning_rate

    def forward(self, x):
        return self.weight * x + self.bias

    def compute_loss(self, x, y_true):
        y_pred = self.forward(x)
        return (y_pred - y_true)**2

    def backward(self,x, y_true):
        y_pred = self.forward(x)
        error = y_pred - y_true

        dw = 2*error*x
        db = 2*error

        return dw, db

    def train_step(self, x, y_true):
        dw, db = self.backward(x, y_true)
        self.weight = self.weight-dw*self.learning_rate
        self.bias = self.bias-db*self.learning_rate

    def train(self, X, y, epochs=1000):
        for epoch in range(epochs):
            total_loss = 0
            for x_i, y_i in zip(X, y):
                total_loss+= self.compute_loss(x_i, y_i )
                self.train_step(x_i, y_i)
            if epoch%10==0:
                print(f'epoch {epoch} loss={total_loss}')

# Example usage
if __name__ == "__main__":
    # Generate synthetic data
    X = [i for i in range(10)]
    y = [2*x + 1 + random.uniform(-0.5, 0.5) for x in X]  # y = 2x + 1 + noise

    # Create and train model
    model = LinearRegression(learning_rate=0.0001)
    model.train(X, y, epochs=1000)

    # Print final parameters
    print(f"\nFinal parameters:")
    print(f"Weight: {model.weight:.4f}")
    print(f"Bias: {model.bias:.4f}")

    # Test predictions
    test_x = 5
    prediction = model.forward(test_x)
    print(f"\nPrediction for x={test_x}: {prediction:.4f}")

epoch 0 loss=1050.1278631083073
epoch 10 loss=325.13631257594653
epoch 20 loss=101.30460625259974
epoch 30 loss=32.20586698346584
epoch 40 loss=10.876741234443305
epoch 50 loss=4.292913704745278
epoch 60 loss=2.259352178443832
epoch 70 loss=1.6292893488360758
epoch 80 loss=1.431768711044012
epoch 90 loss=1.3673688719533201
epoch 100 loss=1.3438508436489833
epoch 110 loss=1.3328525931634976
epoch 120 loss=1.3256823537468276
epoch 130 loss=1.3196924560599421
epoch 140 loss=1.3140851186662366
epoch 150 loss=1.3086247050968187
epoch 160 loss=1.303244154224797
epoch 170 loss=1.2979256966025559
epoch 180 loss=1.2926652607593618
epoch 190 loss=1.2874621666226416
epoch 200 loss=1.2823163207569455
epoch 210 loss=1.277227558842973
epoch 220 loss=1.2721955577663044
epoch 230 loss=1.267219872009239
epoch 240 loss=1.2622999799444763
epoch 250 loss=1.2574353174610242
epoch 260 loss=1.252625298996311
epoch 270 loss=1.2478693298860186
epoch 280 loss=1.2431668133866176
epoch 290 loss=1.2385171545919895

Let me break down how we derive these gradient formulas step by step:

First, let's write out our equations:

Prediction: ŷ = wx + b
Loss function (MSE): L = (ŷ - y)² = (wx + b - y)²


For gradient descent, we need ∂L/∂w and ∂L/∂b. We'll use the chain rule.
For weight (w):

Using chain rule on L = (wx + b - y)²
∂L/∂w = 2(wx + b - y) * ∂(wx + b - y)/∂w
∂(wx + b - y)/∂w = x
Therefore: ∂L/∂w = 2(wx + b - y)x
In code: dw = 2 * error * x where error = (wx + b - y)


For bias (b):

Using chain rule again on L = (wx + b - y)²
∂L/∂b = 2(wx + b - y) * ∂(wx + b - y)/∂b
∂(wx + b - y)/∂b = 1
Therefore: ∂L/∂b = 2(wx + b - y)
In code: db = 2 * error where error = (wx + b - y)