## Multiple Linear Regression from scratch

## $y = b_0 + \sum_{i=1}^{n} b_i X_i$

## $\beta = (X^T X)^{-1} X^T Y$

In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes

from sklearn.model_selection import train_test_split

from sklearn.metrics  import mean_absolute_error , mean_squared_error , r2_score

from sklearn.linear_model import LinearRegression

In [5]:
class multiple_linear_regression:

    def __init__(self):
        self.intercetp_ = None
        self.coef_ = None

    def fit(self ,X_train , Y_train) :

        X = X_train.values if isinstance(X_train , pd.DataFrame) else X_train
        Y = Y_train.values if isinstance(Y_train , pd.DataFrame) else Y_train

        X = np.insert(X , 0  , 1 , axis = 1)
        
        betas = ( np.linalg.inv(X.T @ X) @ (X.T)) @ Y
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]

    def predict(self , X_test):

        X = X_test.values if isinstance(X_test , pd.DataFrame) else X_test
        return ((X @ self.coef_) + self.intercept_)


In [6]:
X,Y = load_diabetes(return_X_y=True)

X.shape , Y.shape , type(X) , type(Y)

((442, 10), (442,), numpy.ndarray, numpy.ndarray)

In [7]:
X[:2]

array([[ 0.03807591,  0.05068012,  0.06169621,  0.02187239, -0.0442235 ,
        -0.03482076, -0.04340085, -0.00259226,  0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, -0.02632753, -0.00844872,
        -0.01916334,  0.07441156, -0.03949338, -0.06833155, -0.09220405]])

In [8]:
Y[:2]

array([151.,  75.])

In [9]:
X_train , X_test , Y_train , Y_test = train_test_split( X , Y , test_size = 0.2 , random_state = 42)

print( X_train.shape , X_test.shape , Y_train.shape , Y_test.shape )

type(X_train) , type(X_test) , type(Y_train) , type(Y_test) 

(353, 10) (89, 10) (353,) (89,)


(numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)

In [10]:
lr = multiple_linear_regression()

lr.fit(X_train , Y_train)

y_pred = lr.predict(X_test)

In [11]:
mae = mean_absolute_error(Y_test , y_pred)
mse = mean_squared_error(Y_test , y_pred)
rmse = np.sqrt( mean_squared_error(Y_test , y_pred) )
r2 = r2_score(Y_test , y_pred)
adj_r2 = 1 - ( ( ( 1- r2) * (X_test.shape[0]) ) / ( X_test.shape[0] - 1 - X_test.shape[1] ) )

In [12]:
print(f' Mean Absolute Error : {mae}')
print(f' Mean Squared Error : {mse}')
print(f' Root Mean Squared Error : {rmse}')
print(f' R2 Socre : {r2}')
print(f' Adjusted R2 Score : {adj_r2}')

 Mean Absolute Error : 42.79409467959993
 Mean Squared Error : 2900.193628493484
 Root Mean Squared Error : 53.85344583676595
 R2 Socre : 0.4526027629719189
 Adjusted R2 Score : 0.37540571672436907


In [13]:
lr.intercept_

151.34560453986

In [14]:
lr.coef_

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

# SKlearn Linear Regression

In [15]:
sklr = LinearRegression()

sklr.fit(X_train , Y_train)

y_pred_sk = sklr.predict(X_test)

In [16]:
mae = mean_absolute_error(Y_test , y_pred_sk)
mse = mean_squared_error(Y_test , y_pred_sk)
rmse = np.sqrt( mean_squared_error(Y_test , y_pred_sk) )
r2 = r2_score(Y_test , y_pred_sk)
adj_r2 = 1 - ( ( ( 1- r2) * (X_test.shape[0]) ) / ( X_test.shape[0] - 1 - X_test.shape[1] ) )

In [17]:
print(f' Mean Absolute Error : {mae}')
print(f' Mean Squared Error : {mse}')
print(f' Root Mean Squared Error : {rmse}')
print(f' R2 Socre : {r2}')
print(f' Adjusted R2 Score : {adj_r2}')

 Mean Absolute Error : 42.79409467959994
 Mean Squared Error : 2900.1936284934804
 Root Mean Squared Error : 53.85344583676592
 R2 Socre : 0.4526027629719196
 Adjusted R2 Score : 0.37540571672436973


In [18]:
sklr.coef_

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [19]:
sklr.intercept_

151.34560453985995