In [12]:
from sklearn.base import BaseEstimator, RegressorMixin
import numpy.linalg as la
import pandas as pd
import numpy as np
from itertools import combinations_with_replacement as cwr
from sklearn.model_selection import train_test_split

In [13]:
class MyLinearModel(BaseEstimator, RegressorMixin):
  def __init__(self, fit_intercept = False, poly_degree=1):
    assert poly_degree > 0, ValueError("the poly_degree must be greater than 0")
    self.fit_intercept = fit_intercept
    self.poly_degree = poly_degree

  def fit(self, X, Y):
    assert X.shape[0] == Y.shape[0], ValueError("the dataframes X and Y must have the same number of samples")
    X = self.augment_data(X)
    self.coef_ = la.solve(X.T@X, X.T@Y)

  def predict(self, X):
    return X@self.coef_

  def augment_data(self, X):
    new_X = X.copy()
    params = self.get_params()

    for i in range(1, params['poly_degree']+1):
      for term in cwr(X.columns, i):
        name = '*'.join(term)
        column = 1
        columns = [X[colname] for colname in term]
        for col in columns:
          column *=col
        new_X[name] = column

    if params['fit_intercept']:
      new_X['const'] = np.ones(X.shape[0])

    return new_X

In [14]:
df = pd.read_csv('https://statlearning.com/s/Advertising.csv', index_col=0)
X = df.drop(columns=['sales'])
Y = df['sales']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

In [15]:
model = MyLinearModel()
model.fit(X, Y)
augmented_X = model.augment_data(X)
score = model.score(augmented_X, Y)
Yhat = model.predict(augmented_X)

print(f'{score=}')

score=0.850651504776713


In [16]:
model = MyLinearModel(fit_intercept=True, poly_degree=3)
model.fit(X_train, Y_train)
augmented_X_test = model.augment_data(X_test)
score = model.score(augmented_X_test, Y_test)
Yhat = model.predict(augmented_X_test)

print(f'{score=}')

score=0.9802906019784468


In [17]:
model = MyLinearModel(fit_intercept=True, poly_degree=0)
model.fit(X_train, Y_train)
augmented_X_test = model.augment_data(X_test)
score = model.score(augmented_X_test, Y_test)
Yhat = model.predict(augmented_X_test)

print(f'{score=}')

AssertionError: the poly_degree must be greater than 0