In [5]:
import pandas as pd
import numpy as np
import random

In [None]:
class MyLineReg():
    def __init__(self, n_iter=100, learning_rate=0.1, weights=False,
                  metric=None, reg=None, l1_coef=0, l2_coef=0, sgd_sample=None, random_state=42):
        self.n_iter = n_iter 
        self.learning_rate  = learning_rate # int, float, lambda iter:
        self.weights = weights 
        self.metric = metric
        self.metrics = {'mse': [], 'mae': [], 'rmse': [], 'r2': [], 'mape': []}
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef 
        self.sgd_sample = sgd_sample
        self.random_state = random_state

    def fit(self, X:pd.DataFrame, y: pd.Series, verbose = False):
        self.X = X
        self.y = y
        self.verbose = verbose

        X.insert(0, 'ones', 1)
        n = X.shape[0]
        count_feat = X.shape[1]
        self.weights = np.ones(count_feat)

        random.seed(self.random_state)

        # if self.metric == 'mae':
        #     for _ in range(self.n_iter):
        #         prediction = X.dot(self.weights)
        #         mae = (abs(prediction - y) ** 2).mean()
        
        for i in range(1, self.n_iter + 1):

            if self.sgd_sample is not None:
                if isinstance(self.sgd_sample, int):
                    sample_rows_idx = random.sample(range(X.shape[0]), self.sgd_sample)
                elif isinstance(self.sgd_sample, float):
                    sample_rows_idx = random.sample(range(X.shape[0]), round(self.sgd_sample * X.shape[0]))

                X_sample = X.iloc[sample_rows_idx]
                y_sample = y.iloc[sample_rows_idx]
                prediction = X_sample.dot(self.weights)
                mse = ((prediction - y_sample) ** 2).mean()
                if self.verbose:
                    print("MSE:", mse) 
                grad = 2/X_sample.shape[0] * (np.subtract(prediction, y_sample)).dot(X_sample)
            else: 
                prediction = X.dot(self.weights)
                mse = ((prediction - y) ** 2).mean()
                if self.verbose:
                    print("MSE:", mse)  
                grad = 2/n * (np.subtract(prediction, y)).dot(X)
            
            if self.reg == 'l1':
                lasso_penalty = self.l1_coef * sum(abs(self.weights))
                grad += self.l1_coef * np.sign(self.weights)

            elif self.reg == 'l2':
                ridge_penalty = self.l2_coef * sum((self.weights)**2)
                grad += self.l2_coef * 2 * self.weights

            elif self.reg == 'elasticnet':
                grad += self.l1_coef * np.sign(self.weights) + self.l2_coef * 2 * self.weights

            if isinstance(self.learning_rate, (int, float)):
                step = -grad * self.learning_rate
            else:
                step = -grad * self.learning_rate(i)
                
            self.weights += step

        final_prediction = X.dot(self.weights)
        mse = ((final_prediction - y) ** 2).mean()
        self.metrics['mse'].append(mse)
        mae = np.mean(np.abs(final_prediction - y))
        self.metrics['mae'].append(mae)
        rmse = np.sqrt(mse)
        self.metrics['rmse'].append(rmse)
        r2 = 1 - sum((y - final_prediction) ** 2) / sum((y - y.mean()) ** 2)
        self.metrics['r2'].append(r2)
        mape = 100 * np.mean(np.abs((y - final_prediction) / y))
        self.metrics['mape'].append(mape)


    def get_best_score(self):
        if self.metric is not None and self.metric in self.metrics:
            return self.metrics[self.metric][-1]
        else:
            return None
    
    def get_coef(self):
        if self.weights is not None:
            return np.mean(self.weights[1:])
        else:
            return 0 
        
    def predict(self, X:pd.DataFrame):
        self.X = X
        X.insert(0, 'ones', 1)
        predicted = X.dot(self.weights)
        return predicted