In [3]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LinearRegression as lrg
from sklearn.metrics import r2_score as r2
from sklearn.datasets import load_diabetes # Using a built in dataset (this is actually a function)

In [4]:
# Brief glimpse into the dataset
x,y = load_diabetes(return_X_y=True) # Scikit learn knows what parts we need and gives us the input and output
print(x.shape)
print("--------------------------------------------------------------------------------------------------------------------------------------------------------------")
print(y.shape)

(442, 10)
--------------------------------------------------------------------------------------------------------------------------------------------------------------
(442,)


In [5]:
x_train,x_test,y_train,y_test = tts(x,y,test_size=0.2,random_state=2)

In [6]:
# Using libraries to apply regression
mpl_using_skl = lrg() # Declared a linear regression object
mpl_using_skl.fit(x_train,y_train) # Training model
y_pred = mpl_using_skl.predict(x_test) # Predicting
print(r2(y_test,y_pred)) # Viewing R2 score
print(mpl_using_skl.coef_) # Viewing coefficients
print(mpl_using_skl.intercept_) # Viewing intercept value
a = r2(y_test,y_pred) # Stroing R2 score for later comparison

0.4399338661568968
[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
151.88331005254167


In [7]:
# Creating multiple linear regression from scratch

In [16]:
class MyMpl:
    def __init__(self):
        self.coef = None
        self.intercept = None
    def train(self,xt,yt):
        yt = yt.reshape(-1, 1)
        np.insert(xt,0,1,axis=1)
        '''
        Our x train doesn't have all 1s in the first column so we are using a numpy function to do it
        the function is np.insert(<array_name>,<index>,<value>,axis=<1 for column, 0 for rows>)

        '''
        x_T_dot_x_whole_inv = np.linalg.inv(np.dot(xt.T,xt))
        uppperOne_dot_x_T_dot_y = np.dot(np.dot(x_T_dot_x_whole_inv,x.T),yt)
        self.intercept = uppperOne_dot_x_T_dot_y[0]
        self.coef = uppperOne_dot_x_T_dot_y[1:]        
    def predict(self,xtst):
        y_pred = np.dot(xtst,self.coef) + self.intercept       

In [19]:
class MyMpl2:
    
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
        
    def train(self,X_train,y_train):
        X_train = np.insert(X_train,0,1,axis=1)
        
        # calcuate the coeffs
        betas = np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]
    
    def predict(self,X_test):
        y_pred = np.dot(X_test,self.coef_) + self.intercept_
        return y_pred

In [21]:
my_mpl = MyMpl2()
my_mpl.train(x_train,y_train)
print(my_mpl.predict(x_test))

[154.1213881  204.81835118 124.93755353 106.08950893 258.5348576
 256.3310074  118.75087616 119.52440696 101.50816735 190.54048661
 141.70656811 172.51883961 174.33861649 134.80942706 294.13994537
  94.11798038 211.97059795 156.49579378 134.21000428 119.62664644
 148.87842251 165.00873409 151.10021038 176.04063756 133.27769647
 221.29555392 197.17324941  96.1577688   50.26012711 230.48580317
 242.06073866 114.11129218  67.07532417  94.52943825 201.21415375
 167.05136201 159.881268   192.78746659 114.49551325 233.48234551
 140.82563045 121.0680409  192.27480772 191.12738845 179.16865788
 148.34935601 163.47414622 276.81647884 100.17926432 164.10555298
 255.80762189 136.9466204  152.37503699 107.92237882 194.21924678
  77.34670792 118.50482479  68.38335763 154.29258529 162.48840259
 168.36788326 156.87790322  97.14191797 238.1671215  145.46179904
 117.65702433 168.88784311 198.38683887 118.24053714 124.64552812
 223.17700368 200.63012386 129.54414666 158.1584765  154.332565
 114.47070769

In [27]:
b=r2(y_test, my_mpl.predict(x_test))
print(a)
print(b)

0.4399338661568968
0.43993386615689634


In [24]:
print(f"R2 of Scikit:R2 of mine = {a/b}")

R2 of Scikit:R2 of mine = 1.000000000000001
