In [119]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [120]:
class NormalLinearRegression:
    def __init__(self) -> None:
        self.X = None
        self.Y = None
        self.theta = None

    def fit(self,x,y):
        """
        Returns the optimal weights.
        parameters: 
            x : input/feature matrix
            y : target matrix

        Returns:
            theta: Array of the optimal value of weights.

        """
        self.X = x

        if self.X.ndim == 1: # adding extra dimension, if X is a 1-D array
            self.X = self.X.reshape(-1,1)

        # adding extra column of 1s for the bias term
        self.X = np.concatenate([np.ones((self.X.shape[0], 1)), self.X], axis=1)

        self.Y = y
        self.theta = np.zeros((self.X.shape[1],1))

        self.theta = self.calculate_theta()
        self.theta = self.theta.reshape(-1,1)

        return self.theta

    def predict(self, x):
        """
        Returns the predicted target.
        parameters: 
            x : test input/feature matrix

        Returns:
            y:  predicted target value.

        """
        x = np.array(x) # converting list to numpy array
        if x.ndim == 1:
            x = x.reshape(1,-1) # adding extra dimension in front
        x = np.concatenate([np.ones((x.shape[0],1)), x], axis=1)
        return np.dot(x,self.theta)

    def calculate_theta(self):
        """
        Calculate the optimal weights.
        parameters: None
        Returns:
            theta_temp: Array containing the calculated value of weights

        """
        y_projection = np.dot(self.X.T, self.Y)
        cov = np.dot(self.X.T, self.X)
        cov_inv = np.linalg.pinv(cov)
        theta_temp = np.dot(cov_inv, y_projection)

        return theta_temp

In [121]:
x, y = datasets.load_diabetes(return_X_y=True)

X_train, X_test, Y_train, Y_test = train_test_split(
x, y, test_size = 1/3, random_state = 0 )

In [122]:
myLinearRegression= NormalLinearRegression()
myLinearRegression.fit(X, Y)

array([[ 152.13348416],
       [ -10.01219782],
       [-239.81908937],
       [ 519.83978679],
       [ 324.39042769],
       [-792.18416163],
       [ 476.74583782],
       [ 101.04457032],
       [ 177.06417623],
       [ 751.27932109],
       [  67.62538639]])

In [123]:
p = myLinearRegression.predict(X_test)

In [124]:
r2_score(Y_test ,p)

0.4285809461744471

In [125]:
x, y = datasets.load_diabetes(return_X_y=True)

X_train, X_test, Y_train, Y_test = train_test_split(
x, y, test_size = 1/3, random_state = 0 )

In [126]:
skModel = LinearRegression()
skModel.fit(X_train,Y_train)
psk = skModel.predict(X_test)

In [127]:
r2_score(Y_test ,psk)

0.40409354863392766