<h2> This notebook contains my very simple implementation of univariate Linear Regression. </h2>
<p> Feel free to download it and play around! </p>

In [2]:
import numpy as np
import pandas as pd
import kagglehub
kagglehub.dataset_download("andonians/random-linear-regression")


Using Colab cache for faster access to the 'random-linear-regression' dataset.


'/kaggle/input/random-linear-regression'

In [5]:
# Load train csv and test csv
train_path = '/kaggle/input/random-linear-regression/train.csv'
test_path = '/kaggle/input/random-linear-regression/test.csv'

df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)

df_train.head()

Unnamed: 0,x,y
0,24.0,21.549452
1,50.0,47.464463
2,15.0,17.218656
3,38.0,36.586398
4,87.0,87.288984


In [37]:
from typing import Callable

def rmse(y_true : np.ndarray, y_preds : np.ndarray) -> np.float64:
    return np.sqrt(((y_true - y_preds) ** 2).sum())

class LinearRegressionModel:
    def __init__(self, lr = 0.05, epochs = 200):
        self.lr = lr
        self.epochs = epochs
        self.b = np.random.normal(loc=0.0, scale=10.0, size=None)
        self.w = np.random.normal(loc=0.0, scale=10.0, size=None)

    def forward(self, X : np.ndarray) -> np.ndarray:
        return self.w * X + self.b

    def fit(self, X : np.ndarray, y : np.ndarray, X_test : np.ndarray, y_test : np.ndarray,
            loss : Callable[[np.ndarray, np.ndarray], np.ndarray]) -> None:
        N = len(X)
        for i in range(self.epochs):
            if (i % 49 == 0):
                print(f'Epoch {i + 1} / {self.epochs}...')
            y_preds = self.forward(X)
            error = loss(y, y_preds)
            test_error = loss(y_test, self.forward(X_test))
            # Compute partial derivatives
            partial_w = (1 / (error * N)) * ((y_preds - y) * X).sum()
            partial_b = (1 / (error * N)) * ((y_preds - y)).sum()

            self.w = self.w - self.lr * partial_w
            self.b = self.b - self.lr * partial_b
            if (i % 49 == 0):
                print(f'Train: {error}')
                print(f'Test: {test_error}')

model = LinearRegressionModel()
model.fit(df_train['x'], df_train['y'], df_test['x'], df_test['y'], rmse)

Epoch 1 / 200...
Train: 15933.31127584343
Test: 10549.099130180797
Epoch 50 / 200...
Train: 7764.305959141762
Test: 5145.003383900915
Epoch 99 / 200...
Train: 103.94078208572441
Test: 72.404899793937
Epoch 148 / 200...
Train: 103.61319647610209
Test: 72.38832977516259
Epoch 197 / 200...
Train: 103.31495370160775
Test: 72.20317627686704


<h2>Using pytorch <h2>

In [99]:
import torch
import torch.nn as nn

class LinearRegressionTorch(nn.Module):
    def __init__(self, input_dim=1, dev=0.01):
        super().__init__()
        self.w = nn.Parameter(torch.randn(1), requires_grad=True)
        self.b = nn.Parameter(torch.randn(1), requires_grad=True)

    def forward(self, X):
        return X * self.w + self.b

def train(X_train, y_train, model, epochs=500):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_fn = nn.MSELoss()

    for i in range(1, epochs + 1):
        model.train()
        y_preds = model(X_train)
        loss = loss_fn(y_preds, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 20 == 0:
            print(f"Epoch {i}, loss={loss.item():.4f}")

df_train = df_train.dropna()
X_train = torch.from_numpy(df_train['x'].values).unsqueeze(1).float()
y_train = torch.from_numpy(df_train['y'].values).unsqueeze(1).float()
model2 = LinearRegressionTorch()
train(X_train, y_train, model2, epochs=500)

Epoch 20, loss=12457.4404
Epoch 40, loss=10059.3633
Epoch 60, loss=8017.7666
Epoch 80, loss=6309.9233
Epoch 100, loss=4901.3335
Epoch 120, loss=3755.6130
Epoch 140, loss=2837.1633
Epoch 160, loss=2112.0994
Epoch 180, loss=1548.8314
Epoch 200, loss=1118.5253
Epoch 220, loss=795.4477
Epoch 240, loss=557.1660
Epoch 260, loss=384.5992
Epoch 280, loss=261.9176
Epoch 300, loss=176.3190
Epoch 320, loss=117.7102
Epoch 340, loss=78.3339
Epoch 360, loss=52.3753
Epoch 380, loss=35.5836
Epoch 400, loss=24.9250
Epoch 420, loss=18.2858
Epoch 440, loss=14.2273
Epoch 460, loss=11.7925
Epoch 480, loss=10.3587
Epoch 500, loss=9.5299
