In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
import sklearn.linear_model

In [None]:
def normalize(X):
    X_normalized = X / np.linalg.norm(X, axis=1, keepdims=True)
    return X_normalized

In [None]:
X = normalize(X)

In [None]:
def train_test_split(X, y, test_size=0.25, shuffle=True):
    X, y = np.asarray(X), np.asarray(y)
    n = len(X)
    if shuffle:
        perm = np.random.permutation(n)
        X, y = X[perm], y[perm]
    test_rows = round(test_size * n)
    X_train, X_test, y_train, y_test = X[test_rows:], X[:test_rows], y[test_rows:], y[:test_rows]
    return X_train, X_test, y_train, y_test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
class LinearRegression:
    def __init__(self, l=0.001, lr=0.1, tol=0.0001, verbose=True):
        self._l = l
        self._lr = lr
        self._tol = tol
        self._verbose = verbose
    
    def __preprocess(self, X, y):
        X, y = np.asarray(X), np.asarray(y)
        if X.ndim == 1:
            X = X.reshape(X.size, 1)
        X = np.hstack((np.ones((len(X), 1)), X))
        return X, y
    
    def __predict(self):
        pred = np.dot(self._X, self._w)
        return pred
    
    def __score(self):
        preds = self.__predict()
        SS_r = ((self._y - preds) ** 2).sum()
        SS_t = ((self._y - self._y.mean()) ** 2).sum()
        r_squared = 1 - SS_r / SS_t
        return r_squared
    
    def __cost(self):
        preds = self.__predict()
        sq_err = (preds - self._y) ** 2
        sq_reg = self._w[1:] ** 2
        cost = 1 / (2 * self._n) * (sq_err.sum() + self._l * sq_reg.sum())
        return cost
    
    def __iterate(self):
        preds = self.__predict()
        err = self._y - preds
        gradient = np.dot(-self._X.T, err) / self._n * self._lr
        self._w -= gradient
    
    def get_ls_y(self, ls_x):
        ls_y = [self._w[1] * x + self._w[0] for x in ls_x]
        return ls_y
    
    def fit(self, X, y):
        X, y = self.__preprocess(X, y)
        self._X = X
        self._y = y
        self._n = len(self._X)
        self._w = np.zeros((self._X.shape[1], self._y.shape[1]) if self._y.ndim == 2 else self._X.shape[1])
        
        i = 0
        prev_cost = None
        cost = None
        while (cost == None or cost > 0) and (prev_cost == None or prev_cost - cost > self._tol):
            self.__iterate()
            prev_cost = cost
            cost = self.__cost()
            if self._verbose:
                score = self.__score()
                print(f'Iteration: {i}     Cost: {cost}     Score: {score}')
            i += 1
    
    def predict(self, X, pad=True):
        X = np.asarray(X)
        if pad:
            X = np.hstack((np.ones((len(X), 1)), X))
        pred = np.dot(X, self._w)
        return pred
    
    def score(self, X, y):
        X, y = self.__preprocess(X, y)
        n = len(X)
        preds = self.predict(X, False)
        SS_r = ((y - preds) ** 2).sum()
        SS_t = ((y - y.mean()) ** 2).sum()
        r_squared = 1 - SS_r / SS_t
        return r_squared

In [None]:
clf = LinearRegression(lr=0.0001)

In [None]:
clf.fit(X_train, y_train)

In [None]:
clf.score(X_test, y_test)

In [None]:
clf.predict(X_test[5].reshape(1, -1))

In [None]:
y_test[5]