In [1]:
import numpy as np
from tqdm import tqdm


class LinearRegression:
    def __init__(self, lr=0.01, epochs=1000):
        self.epochs = epochs
        self.lr = lr
        self.W = 0
        self.b = 0

    def initialize(self, n_features):
        self.W = np.random.normal(0, 1, size=(n_features, 1))
        self.W = np.squeeze(self.W, axis=1)

    def gradient(self, X, y, n_samples):
        y_pred = self.predict(X)
        
        # Calculate diff
        d_w = (2 / n_samples) * np.dot(X.T, (y_pred - y))
        d_b = (2 / n_samples) * np.sum((y_pred - y))

        return d_w, d_b

    def fit(self, X, y):
        # Load sample and features
        n_samples, n_features = X.shape
        # Init weights
        self.initialize(n_features)
        # Calculate gradient descent per epoch
        for _ in tqdm(range(self.epochs)):
            d_w, d_b = self.gradient(X, y, n_samples)
            self.W -= self.lr * d_w
            self.b -= self.lr * d_b

    def predict(self, X):
        return np.dot(X, self.W) + self.b

In [2]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression as SkLearnLN
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [3]:
data = make_regression(n_samples=100000, n_features=10)

# Train test split
X_train, X_test, y_train, y_test = train_test_split(data[0], data[1], test_size=0.2)

In [11]:
# Fit our model
model = LinearRegression(epochs=2000)
model.fit(X_train, y_train)

# Evaluation our model 
y_pred = model.predict(X_test)
mse = mean_squared_error(y_pred, y_test)



100%|██████████| 2000/2000 [00:01<00:00, 1053.16it/s]

Mean square error our:  3.257897561135531e-25
Mean square error sklearn:  1.7868139348369212e-25



