In [1]:
import numpy as np
from sklearn.datasets import load_diabetes

In [7]:
X, y = load_diabetes(return_X_y=True)

In [10]:
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [11]:
X.shape

(442, 10)

In [50]:
y.shape

(442,)

In [17]:
X_test.shape

(89, 10)

## Using Sklearn's Linear Regression

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=2)

In [51]:
print(X_train.shape)
print(X_test.shape)

(353, 10)
(89, 10)


In [19]:
from sklearn.linear_model import LinearRegression

In [20]:
mlr = LinearRegression()

In [21]:
mlr.fit(X_train, y_train)

In [26]:
y_pred = mlr.predict(X_test)

In [23]:
y_test

array([ 73., 233.,  97., 111., 277., 341.,  64.,  68.,  65., 178., 142.,
        77., 244., 115., 258.,  87., 220.,  86.,  74., 132., 136., 220.,
        91., 235., 148., 317., 131.,  84.,  65., 217., 306.,  79., 158.,
        54., 123., 174., 237., 212., 179., 281., 187., 200.,  68., 163.,
       141., 202., 178., 242.,  47., 131., 243., 142., 200.,  89., 232.,
        55., 253., 128., 104., 184., 110., 198.,  81., 195., 150.,  63.,
       151., 233., 178.,  84., 237., 109., 131., 252., 200., 160., 200.,
        51., 111.,  77., 201.,  88.,  78., 243., 268.,  55., 270., 288.,
        91.])

In [24]:
from sklearn.metrics import r2_score

In [27]:
r2_score(y_test, y_pred)

0.4399338661568968

In [28]:
mlr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [29]:
mlr.intercept_

151.88331005254167

## Writing Multiple Linear Regression from scratch using Ordinary least Square (OLS)

In [52]:
class MultipleLinearRegression():
    
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
        
    def fit(self, X_train, y_train):
        X_train = np.insert(X_train,0,1,axis=1)
        
        
        betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]
        
    def predict(self, X_test):
        
        return np.dot(X_test, self.coef_) + self.intercept_

In [59]:
X_train.shape

(353, 10)

In [53]:
lr = MultipleLinearRegression()

In [54]:
lr.fit(X_train, y_train)

In [62]:
y_pred = lr.predict(X_test)

In [66]:
lr.coef_[0]

-9.158653181171129

In [67]:
lr.intercept_

151.88331005254165

In [64]:
r2_score(y_test, y_pred)

0.4399338661568961