In [1]:
from sklearn.base import BaseEstimator, RegressorMixin
import numpy.linalg as la

$\dfrac{1}{N}\displaystyle{\sum_{i=1}^{N}}(Y_i-E[Y])^2 = \operatorname{var}(Y)$

$\left(1 - \dfrac{\dfrac{1}{N}\operatorname{RSS}}{\operatorname{var}}\right)=$ Fraction of the variance explained

If $R^2 < 0$, the model is garbage. As $R^2 \rightarrow 1$, the model gets better.

In [2]:
class MyLinearModel(BaseEstimator, RegressorMixin):
  def __init__(self, fit_intercept = False, poly_degree=1):
    self.fit_intercept = fit_intercept
    self.poly_degree = poly_degree

  def fit(self, X, Y):
    assert X.shape[0] == Y.shape[0], "X and y must have the same number of samples"
    self.coef_ = la.solve(X.T@X, X.T@Y)

  def predict(self, X):
    return X@self.coef_

In [3]:
import pandas as pd

df = pd.read_csv('https://statlearning.com/s/Advertising.csv', index_col=0)

In [4]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [5]:
model = MyLinearModel()
X = df[['TV', 'radio', 'newspaper']]
Y = df['sales']
model.fit(X,Y)

In [6]:
Yhat = model.predict(X)

In [7]:
model.score(X,Y)

0.850651504776713

In [8]:
Y - Yhat

1      0.158603
2     -1.485691
3     -2.990830
4      0.188784
5     -0.207703
         ...   
196    4.490843
197    3.407679
198    1.104499
199   -0.202168
200   -1.142552
Length: 200, dtype: float64

In [10]:
model.get_params()

{'fit_intercept': False, 'poly_degree': 1}

In [11]:
model.set_params(fit_intercept=True)

MyLinearModel(fit_intercept=True)

In [12]:
model.get_params()

{'fit_intercept': True, 'poly_degree': 1}

In [13]:
model.coef_

array([0.05379188, 0.22222731, 0.01681639])