In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import load_diabetes

X, y  = load_diabetes(return_X_y=True)

In [3]:
X.shape

(442, 10)

In [4]:
y.shape

(442,)

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

lr = LinearRegression()

lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

print(r2_score(y_test, y_pred))

0.4526027629719197


In [7]:
lr.coef_

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [8]:
lr.intercept_

np.float64(151.34560453985995)

## Formula

- B = (X^T * X)^-1 * (X^T * y)
- X has a extra col with 1 so shape would be (n , m + 1)

In [9]:
class CustomLR():

    def __init__(self):
        self.coef = None
        self.intercept = None
    
    def fit(self, X_train, y_train):
        X_train = np.insert(X_train, 0, 1, axis=1)
                # where to add, pos, val to add, we need only in col

        betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)
        self.intercept = float(betas[0])
        self.coef = betas[1:]

    def predict(self, X_test):
        return self.intercept + np.dot(X_test, self.coef)

In [10]:
cus_lr = CustomLR()

cus_lr.fit(X_train, y_train)

ycus_pred = cus_lr.predict(X_test)

print(r2_score(y_test, ycus_pred))

0.45260276297191904


In [11]:
cus_lr.intercept

151.34560453986

In [12]:
cus_lr.coef

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])