In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.dummy import DummyRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.base import RegressorMixin, BaseEstimator
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
def generate_data(n_samples=10000, n_features=1):
    X, y = make_regression(
        n_samples=n_samples,
        n_features=n_features,
        noise=15,
        random_state=42,
    )
    return X, y

In [None]:
def plot_dataset(X, y):
    plt.figure(figsize=(10,6))
    plt.scatter(X, y)
    plt.show()

In [None]:
X, y = generate_data()
plot_dataset(X, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.25, random_state = 42
)

In [None]:
plot_dataset(X_train, y_train)

In [None]:
plot_dataset(X_test, y_test)

In [None]:
def plot_prediction(X, y, model):
    plt.figure(figsize=(10,6))
    plt.scatter(X, y)
    grid = np.arange(np.min(X), np.max(X), 0.1).reshape(-1, 1)
    plt.plot(grid, model.predict(grid), 'red')
    plt.show()

In [None]:
def print_metrics(true, predict):
    r2 = r2_score(y_test, predict)
    rmse = mean_squared_error(y_test, predict)**0.5
    #rmse1 = mean_squared_error(true, predict, squared=False)
    mae = mean_absolute_error(y_test, predict)
    #print(rmse, rmse1)
    print(f'Results:\nr2:   {r2:.3f}\nrmse: {rmse:.3f}\nmae:  {mae:.3f}') 

In [None]:
class MyRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, strategy = 'mean', iterations = 1000, alpha = 0.0001, verbose = False):
        self.strategy = strategy
        self.iterations = iterations
        self.alpha = alpha
        self.verbose = verbose
        
    def fit(self, X, y):
        if self.strategy == 'mean':
            self.y_predict = np.mean(y)
        elif self.strategy == 'median':
            self.y_predict = np.median(y)
        elif self.strategy == 'analytic':
            X_b = self.add_bias(X)
            self.weights = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
        elif self.strategy == 'gradient_descent':
            self.weights = np.zeros((2,1))
            len_data = X.shape[0]
            X_b = self.add_bias(X)
            y = y.reshape(len_data, 1)
            losses = []
            for i in range(self.iterations):
                predict = np.dot(X_b, self.weights)
                loss = mean_squared_error(y, predict)
                error = predict - y
                self.weights = self.weights - (self.alpha/len_data) * np.dot(X_b.T, error)
                losses.append(loss)
                if self.verbose:
                    if i%100==0:
                        print(f'step: {i},error: {error.shape} loss:{loss}, weights: {self.weights}')
            print(f'Weigths: {self.weights}')
        
    @staticmethod
    def add_bias(X):
        len_data = X.shape[0]
        ones = np.ones(len_data).reshape(len_data, 1)
        return np.concatenate((ones, X), 1)
        
    def predict(self, X):
        if self.strategy == 'mean' or self.strategy == 'median':
            return np.ones(X.shape[0]) * self.y_predict
        elif self.strategy == 'analytic' or self.strategy == 'gradient_descent':
            X_b = self.add_bias(X)
            return np.dot(X_b, self.weights)

In [None]:
model = MyRegressor(strategy = 'mean')
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
plot_prediction(X_test, y_test, model)

In [None]:
model = DummyRegressor(strategy='mean')
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
model = MyRegressor(strategy = 'median')
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
model = DummyRegressor(strategy='median')
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
model = MyRegressor(strategy = 'analytic')
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
plot_prediction(X_test, y_test, model)

In [None]:
model = MyRegressor(strategy = 'gradient_descent', iterations = 10000, alpha = 0.001)
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
plot_prediction(X_test, y_test, model)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)
predict = model.predict(X_test)
print_metrics(y_test, predict)

In [None]:
plot_prediction(X_test, y_test, model)