In [7]:
import numpy as np
from sklearn.datasets import load_diabetes
X,y = load_diabetes(return_X_y=True)
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286377, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04687948,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452837, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00421986,  0.00306441]])

In [8]:
x.shape

(442, 10)

In [9]:
y.shape

(442,)

## Using sklearn's Linear Regression

In [10]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)
print(X_train.shape)
print(X_test.shape)

from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_train,y_train)

(353, 10)
(89, 10)


LinearRegression()

In [11]:
y_pred = reg.predict(X_test)
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.4399387660024644

In [15]:
print(reg.coef_)     #find all coefficients
print("-==========")
print(reg.intercept_)      #b_0 value

[  -9.16088483 -205.46225988  516.68462383  340.62734108 -895.54360867
  561.21453306  153.88478595  126.73431596  861.12139955   52.41982836]
151.88334520854633


## Making our own Linear Regression class

In [25]:
class MultipleLR:
    def __init__(self):
        self.coeff_ = None
        self.intercept = None
        
    def fit(self, X_train, y_train):
        # insert additional col in data of 1's
        # insert(which array, which index, what changes(add 1s) ,axis)
        X_train = np.insert(X_train, 0, 1, axis=1)
        # axis=1 means column level changes
              
        # now cal coeff
        # beta =  ((X^T.X)^-1).(X^T).Y
        betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]
       
    
    def predict(self, X_test):
        # y = b0 + b1x1 + b2x2 +...+ bnxn
        # t_test.shape = 89,10
        # coeff shape = 10, 1
        
        y_pred = np.dot(X_test, self.coef_) + self.intercept_
        return y_pred

In [26]:
lr = MultipleLR()    # make a object
lr.fit(X_train, y_train)     #call fit function of class

In [27]:
lr.predict(X_test)

array([154.1235067 , 204.81721599, 124.92988001, 106.09339576,
       258.53035681, 256.32953702, 118.75258786, 119.52147402,
       101.50717468, 190.54137158, 141.70360267, 172.51631204,
       174.34089304, 134.81184017, 294.13950798,  94.11403289,
       211.97052873, 156.49984762, 134.20709632, 119.62534726,
       148.88045343, 165.00378118, 151.09977307, 176.03719872,
       133.27651748, 221.29531227, 197.17482787,  96.15923158,
        50.26531577, 230.48342249, 242.06266394, 114.1153262 ,
        67.0785352 ,  94.53179042, 201.21593262, 167.05306138,
       159.87838519, 192.78601513, 114.49291816, 233.4825497 ,
       140.82309666, 121.06814332, 192.27431013, 191.13157307,
       179.1698153 , 148.35140027, 163.47610288, 276.81203359,
       100.17813072, 164.11265163, 255.81074398, 136.94979051,
       152.37507828, 107.92662528, 194.21661635,  77.35015426,
       118.50951725,  68.38527563, 154.29094022, 162.48905632,
       168.36590928, 156.8764705 ,  97.13958436, 238.17

In [28]:
r2_score(y_test, y_pred) 

0.4399387660024644

In [29]:
print(lr.coef_)
print(lr.intercept_)

[  -9.16088483 -205.46225988  516.68462383  340.62734108 -895.54360867
  561.21453306  153.88478595  126.73431596  861.12139955   52.41982836]
151.88334520854627
