In [1]:
import numpy as np
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [2]:
X, y = make_regression(10000, 10, noise=20)

In [3]:
class LinearBase:
    def __init__(self, learning_rate, n_iter):
        self.learning_rate = learning_rate if learning_rate else 0.1
        self.n_iter = n_iter if n_iter else 1000
        self.input_dim = None
        self.coefs = None
        self.intercept = None
        self.fitted = False 
        
    def _error(self, X, y, theta):
        return X.dot(theta) - y.reshape(-1,1)
    
    def predict(self, X):
        if not self.fitted:
            raise Exception("Linreg is not yet fitted.")
        if len(X.shape) == 1:
            X = X.reshape(1,-1)
        if X.shape[1] != self.input_dim:
            raise Exception(f"Input data shape must be equal to fit data shape {self.input_dim}")
        if X.shape[1] == 1:
            return X.dot(self.coefs) + self.intercept
        return X.dot(self.coefs)
    
    def __repr__(self):
        if self.fitted:
            return f"coefficients: {self.coefs}, \n\n intercept: {self.intercept}"
        else:
            return "Linreg"    

In [11]:
class RidgeReg(LinearBase):
    def __init__(self, learning_rate, n_iter, alpha):
        super().__init__(learning_rate, n_iter)
        self.alpha = alpha

    def fit(self, X, y, optimization='normal', verbose=None):
        self.input_dim = X.shape[1]
        
        # add 1s
        xb = np.c_[np.ones((X.shape[0],1)), X]
        
        # initiate coefs
        theta = np.ones((xb.shape[1], 1))
        
        # get m
        m = xb.shape[0]
        
        # normal equation
        if optimization=='normal':
            if m < 50000:
                
                # identity matrix
                id_m = np.eye(len(theta))
                id_m[0][0] = 0
                A = self.alpha * id_m  
                
                # coefficients
                theta = np.linalg.inv(xb.T.dot(xb) + A).dot(xb.T).dot(y)
            else:
                optimization = 'gradient_descent'
                
        if optimization=='gradient_descent':
            for step in range(self.n_iter):
                
                # compute error
                error = self._error(xb, y, theta)

                # update coefficients            
                theta = theta - (self.learning_rate / m) * (np.dot(xb.T, error) + (2 * self.alpha * theta))
        
        self.coefs = theta[1:]
        self.intercept = theta[0]
        self.fitted = True
        return self
   

In [70]:
xb = np.c_[np.ones((X.shape[0],1)), X]
xb

array([[ 1.        , -1.62296152,  0.37072014, ..., -0.62845298,
         1.29458756, -0.23641115],
       [ 1.        ,  0.38604384,  0.4430225 , ...,  0.99574685,
         0.86481061,  1.0405665 ],
       [ 1.        ,  1.39644852,  0.19539866, ..., -1.24914932,
         0.5649226 ,  0.2251639 ],
       ...,
       [ 1.        , -0.28387146,  0.4518475 , ...,  1.4362834 ,
         0.66029339,  1.7010591 ],
       [ 1.        , -0.6224633 , -0.58801878, ..., -0.90606239,
        -1.15240146,  1.39284271],
       [ 1.        , -0.84682469, -1.69955074, ...,  0.81982981,
         0.49988047, -1.31688371]])

In [72]:
beta = np.zeros(X.shape[1])

beta[0] = np.sum(y - np.dot(X[:, 1:], beta[1:]))/(X.shape[0])

In [84]:
i = np.array([[1, 2, 9], [3, 4,9], [5,6,9]])
i[:, 1:]

array([[2, 9],
       [4, 9],
       [6, 9]])

In [135]:
class LASSOReg(LinearBase):
    
    def __init__(self, learning_rate=None, n_iter=1000, alpha=1):
        super().__init__(learning_rate, n_iter)
        self.alpha = alpha
    

    def fit(self, X, y):
        self.input_dim = X.shape[1]
        
        # add 1s for intercept
        xb = np.c_[np.ones((X.shape[0],1)), X]
        
        # get input dimentions
        m, n = xb.shape    
            
        # initialise theta    
        theta = np.zeros(n)

        # set intercept
        theta[0] = np.sum(y - np.dot(xb[:, 1:], theta[1:])) / m
        
        for iteration in range(self.n_iter):
            
            # iterate over coefficients
            for param in range(1, n):
                
                # temporary theta
                theta_ = theta.copy()
                theta_[param] = 0.0
                
                # residuals
                err = y - np.dot(xb, theta_)
                
                # input to thresholding
                x = np.dot(xb[:, param], err)
                lmbda = self.alpha * m

                # update coefficient
                theta[param] = self.__soft_thresholding(x, lmbda) / (xb[:, param] ** 2).sum()

                # set intercept
                theta[0] = np.sum(y - np.dot(xb[:, 1:], theta[1:])) / m

    
        self.intercept = theta[0]
        self.coefs = theta[1:]
        self.fitted = True
        return self

    @staticmethod
    def __soft_thresholding(x, lmbda):
        if x > 0 and lmbda < abs(x):
            return x - lmbda
        elif x < 0 and lmbda < abs(x):
            return x + lmbda
        return 0
   

# Test vs sklearn

### Ridge Regression

In [13]:
from sklearn.linear_model import Ridge

In [14]:
m = Ridge(alpha=0.5).fit(X, y)


In [15]:
m.coef_, m.intercept_

(array([85.09746966, 42.06663481, 73.32903535, 97.59210132, 27.71177308,
        44.00689669,  8.25142817, 10.85535856, 62.33368936, 76.94600574]),
 0.21416776015763106)

In [16]:
q = RidgeReg(learning_rate=0.1, n_iter = 1000, alpha = 0.5)
q.fit(X, y)


coefficients: [85.09746966 42.06663481 73.32903535 97.59210132 27.71177308 44.00689669
  8.25142817 10.85535856 62.33368936 76.94600574], 

 intercept: 0.21416776015763317

In [17]:
predictors = np.ones(10)

In [18]:
q.predict(predictors)

array([528.19039273])

In [19]:
m.predict(predictors.reshape(1, -1))

array([528.40456049])

### LASSO regression

In [119]:
from sklearn.linear_model import Lasso

In [120]:
X, y = make_regression(1000, 20, noise=20, n_informative=5)

In [121]:
las = Lasso(alpha=1).fit(X, y)
las.coef_

array([ 0.00000000e+00,  7.91745523e+01,  9.45887573e+01,  0.00000000e+00,
       -1.21180336e+00, -0.00000000e+00,  2.27903718e-01, -0.00000000e+00,
       -0.00000000e+00, -0.00000000e+00,  3.76269861e-03,  0.00000000e+00,
        4.36086775e+01,  2.77143229e+01,  0.00000000e+00, -4.20251017e-02,
        7.82095059e+00,  0.00000000e+00, -0.00000000e+00, -0.00000000e+00])

In [122]:
lasso_preds = np.ones(20)

In [123]:
las.predict(lasso_preds.reshape(1,-1))

array([250.45332243])

In [138]:
las_sct = LASSOReg(alpha=1).fit(X, y)
las_sct.coefs

array([ 0.00000000e+00,  7.91747706e+01,  9.45887815e+01,  0.00000000e+00,
       -1.21174371e+00,  0.00000000e+00,  2.27879987e-01,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  3.74856756e-03,  0.00000000e+00,
        4.36087014e+01,  2.77143190e+01,  0.00000000e+00, -4.20170957e-02,
        7.82094724e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00])

In [139]:
las_sct.predict(lasso_preds.reshape(1,-1))

array([251.88538748])