## Training Your Own Linear Regressesor

Create a linear regressor, with a Scikit-learn compatible fit-predict interface. You should implement every detail of the linear regressor in Python, using whatever library you want (except a linear regressor itself).

You must investigate and describe all major details for a linear regressor, and implement at least the following concepts (MUST):

### Qa: Concepts and Implementations MUSTS

* the `fit-predict` interface, and a $R^2$ score function,
* one-dimensional output only,
* loss function based on (R)MSE,
* setting of the number of iterations and learning rate ($\eta$) via parameters in the constructor, the signature of your `__init__` must include the named parameters `max_iter` and `eta0`,
* the batch-gradient decent algorithm (GD),
* constant or adaptive learning rate,
* learning graphs,
* stochastic gradient descent (SGD),
* epochs vs iteations,
* compare the numerical optimization with the Closed-form solution.

In [62]:
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.metrics import r2_score

class MyLinearRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, max_iter=1000, eta0=0.01):
        self.max_iter = max_iter # Number of iterations
        self.eta0 = eta0 # Learning rate
        self.weights = None
        self.bias = None

    def fit(self, X, y_true):
        m, n = X.shape
        self.weights = np.zeros(n)  # Initialize weights with zeros
        self.bias = 0  # Initialize bias with zero

        for _ in range(self.max_iter):
        # Calculate predictions
            y_pred = self.predict(X)

            # Calculate errors
            errors = y_pred - y_true

            # Update weights and bias using gradient descent
            self.weights -= self.eta0 * (1/m) * np.dot(X.T, errors)
            self.bias -= self.eta0 * (1/m) * np.sum(errors)

    def predict(self, X):
        y_pred = np.dot(X, self.weights) + self.bias
        return y_pred

    def score(self, X, y):
        return r2_score(y, self.predict(X))

    def RMSE(self, y_pred, y_true):
        # Calculate squared differences (L2)
        squared_diff = (y_true - y_pred) ** 2
        
        # Compute mean of squared differences
        mean_squared_diff = np.mean(squared_diff)
        
        # Take square root to obtain RMSE
        rmse_value = np.sqrt(mean_squared_diff)
        
        return rmse_value
    
    # From our own previous assignment
    def checkInputSameShape(self, y_pred, y_true):
        assert y_pred.shape == y_true.shape, "Shape of input is not equal!"

In [63]:
# Mini smoke test for your linear regressor...

import sys
import numpy

def PrintOutput(msg, pre_msg, ex=None, color="", filestream=sys.stdout):
    #BLACK    ="\033[0;30m"
    #BLUE     ="\033[0;34m"
    #LBLUE    ="\033[1;34m"
    #RED      ="\033[0;31m"
    #LRED     ="\033[1;31m"
    #GREEN    ="\033[0;32m"
    #LGREEN   ="\033[1;32m"
    #YELLOW   ="\033[0;33m"
    #LYELLOW  ="\033[1;33m"
    #PURPLE   ="\033[0;35m"
    #LPURPLE  ="\033[1;35m"
    #CYAN     ="\033[0;36m"
    #LCYAN    ="\033[1;36m"
    #BROWN    ="\033[0;33m"
    #DGRAY    ="\033[1;30m"
    #LGRAY    ="\033[0;37m"
    #WHITE    ="\033[1;37m"
    #NC       ="\033[0m"
    color_end = "\033[0m" if color!="" else ""
    if ex is not None:
        msg += f"\n   EXCEPTION: {ex} ({type(ex)})"
    print(f"{color}{pre_msg}{msg}{color_end}", file=filestream)

def Warn(msg, ex=None):
    PrintOutput(msg, "WARNING: ", ex, "\033[1;33m")

def Err(msg, ex=None):
    PrintOutput(msg, "ERROR: ", ex, "\033[1;31m" )
    exit(-1)

def Info(msg):
    PrintOutput(msg, "", None, "\033[1;35m")

def SimplePrintMatrix(x, label="", precision=12):
    # default simple implementation, may be overwritten by a libitmal function later..
    print(f"{label}{' ' if len(label)>0 else ''}{x}")

def SimpleAssertInRange(x, expected, eps):
    #assert isinstance(x, numpy.ndarray)
    #assert isinstance(expected, numpy.ndarray)
    #assert x.ndim==1 and expected.ndim==1
    #assert x.shape==expected.shape
    assert eps>0
    assert numpy.allclose(x, expected, eps) # should rtol or atol be set to eps?

def GenerateData():
    X = numpy.array([[8.34044009e-01],[1.44064899e+00],[2.28749635e-04],[6.04665145e-01]])
    y = numpy.array([5.97396028, 7.24897834, 4.86609388, 3.51245674])
    return X, y

def TestMyLinReg():
    X, y = GenerateData()

    try:
        # assume that your regressor class is named 'MyLinReg', please update/change
        regressor = MyLinearRegressor()
    except Exception as ex:
        Err("your regressor has another name, than 'MyLinReg', please change the name in this smoke test", ex)

    try:
        regressor = MyLinearRegressor(max_iter=200, eta0=0.4)
    except Exception as ex:
        Err("your regressor can not be constructed via the __init_ (with two parameters, see call above", ex)

    try:
        regressor.fit(X, y)
    except Exception as ex:
        Err("your regressor can not fit", ex)

    try:
        y_pred = regressor.predict(X)
        Info(f"y_pred = {y_pred}")
    except Exception as ex:
        Err("your regressor can not predict", ex)

    try:
        score  = regressor.score(X, y)
        Info(f"SCORE = {score}")
    except Exception as ex:
        Err("your regressor fails in the score call", ex)

    try:
        w    = None # default
        bias = None # default
        try:
            w = regressor.weights
            bias = regressor.bias
        except Exception as ex:
            w = None
            Warn("your regressor has no coef_/intercept_ atrributes, trying Weights() instead..", ex)
        try:
            if w is None:
                w = regressor.Weights() # maybe a Weigths function is avalible on you model?
                try:
                    assert w.ndim == 1,     "can only handle vector like w's for now"
                    assert w.shape[0] >= 2, "expected length of to be at least 2, that is one bias one coefficient"
                    bias = w[0]
                    w = w[1:]
                except Exception as ex:
                    w = None
                    Err("having a hard time concantenating our bias and coefficients, giving up!", ex)
        except Exception as ex:
            w = None
            Err("your regressor also has no Weights() function, giving up!", ex)
        Info(f"bias         = {bias}")
        Info(f"coefficients = {w}")
    except Exception as ex:
        Err("your regressor fails during extraction of bias and weights (but is a COULD)", ex)

    try:
        from libitmal.utils import PrintMatrix
    except Exception as ex:
        PrintMatrix = SimplePrintMatrix # fall-back
        Warn("could not import PrintMatrix from libitmal.utils, defaulting to simple function..")

    try:
        from libitmal.utils import AssertInRange
    except Exception as ex:
        AssertInRange = SimpleAssertInRange # fall-back
        Warn("could not import AssertInRange from libitmal.utils, defaulting to simple function..")

    try:
        if w is not None:
            if bias is not None:
                w = numpy.concatenate(([bias], w)) # re-concat bias an coefficients, may be incorrect for your implementation!
            # TEST VECTOR:
            w_expected = numpy.array([4.046879011698, 1.880121487278])
            PrintMatrix(w,          label="\tw         =", precision=12)
            PrintMatrix(w_expected, label="\tw_expected=", precision=12)
            eps = 1E-3 # somewhat big epsilon, allowing some slack..
            AssertInRange(w, w_expected, eps)
            Info("well, good news, your w and the expected w-vector seem to be very close numerically, so the smoke-test has passed!")
        else:
            Warn("cannot test due to missing w information")
    except Exception as ex:
        Err("mini-smoketest on your regressor failed", ex)

Warn("This mini smoke test is currently untested, please modify...")
TestMyLinReg()
print("OK")

[1;35my_pred = [5.61498304 6.75547481 4.04730809 5.18372265][0m
[1;35mSCORE = 0.49500564295554395[0m
[1;35mbias         = 4.046878010107266[0m
[1;35mcoefficients = [1.88012265][0m
	w         =[4.046878010107 1.880122650194]
	w_expected=[4.046879011698 1.880121487278]
[1;35mwell, good news, your w and the expected w-vector seem to be very close numerically, so the smoke-test has passed![0m
OK
