### LASSO L1 (Least Absolute Shrinkage and Selection Operator)

In [1]:
# Import Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as img

from sklearn.linear_model import Ridge

### Lasso from scratch using Gradient Descent

In [8]:
import numpy as np

class LassoRegressionGradientDescent:
    def __init__(self, alpha=0.01, lambda_=1.0, epochs=1000):
        """
        Initialize hyperparameters for Lasso Regression.

        Parameters:
        alpha (float): Learning rate.
        lambda_ (float): L1 regularization strength.
        epochs (int): Number of training iterations.
        """
        self.alpha = alpha
        self.lambda_ = lambda_
        self.epochs = epochs
        self.theta = None

    def fit(self, X, y):
        """
        Fit the Lasso Regression model using gradient descent.

        Parameters:
        X (np.ndarray): Feature matrix (m x n)
        y (np.ndarray): Target vector (m x 1)
        """
        m, n = X.shape
        self.theta = np.zeros((n, 1))  # Initialize weights to zeros

        for epoch in range(self.epochs):
            # Compute gradient with L1 regularization
            gradient = (-1/m) * X.T @ (y - X @ self.theta) + self.lambda_ * np.sign(self.theta)

            # Update weights
            self.theta -= self.alpha * gradient

            # Print loss every 100 epochs
            if epoch % 100 == 0:
                loss = (1/(2*m)) * np.sum((y - X @ self.theta)**2) + self.lambda_ * np.sum(np.abs(self.theta))
                print(f"Epoch {epoch}: Loss = {loss:.4f}")

    def predict(self, X):
        """
        Make predictions using the trained model.

        Parameters:
        X (np.ndarray): Feature matrix (m x n)

        Returns:
        np.ndarray: Predicted values (m x 1)
        """
        return X @ self.theta

    def get_weights(self):
        """
        Get the learned weight vector (theta).
        
        Returns:
        np.ndarray: Weight vector
        """
        return self.theta


# Example usage:
np.random.seed(42)
X = np.random.randn(100, 2)  # 100 samples, 2 features
y = 3 * X[:, 0:1] + 2 * X[:, 1:2] + np.random.randn(100, 1)  # Generating target with noise

# Run Lasso Regression with Gradient Descent
theta_optimal = LassoRegressionGradientDescent(alpha=0.1, lambda_=0.5, epochs=1000)
theta_optimal.fit(X, y)
print("Optimal theta:", theta_optimal.get_weights())

Epoch 0: Loss = 5.4092
Epoch 100: Loss = 2.7753
Epoch 200: Loss = 2.7753
Epoch 300: Loss = 2.7753
Epoch 400: Loss = 2.7753
Epoch 500: Loss = 2.7753
Epoch 600: Loss = 2.7753
Epoch 700: Loss = 2.7753
Epoch 800: Loss = 2.7753
Epoch 900: Loss = 2.7753
Optimal theta: [[2.51531569]
 [1.34144528]]


* `@ is np.dot` <br>
* `np.sign returns -1 if x < 0, 0 if x==0, 1 if x > 0 . nan is returned for nan inputs.` <br>
* `theta are weights` <br>

**np.sign(theta)** : <br> `We are using it to calculate subgradient. A subgradient is a generalization of gradient that allows us to compute derivatives for a functions that are not differentiable at some points.`

In [7]:
# class with threshold to force small value to zero

class LassoRegressionGD:
    def __init__(self, alpha=0.1, lambda_=0.5, epochs=10, threshold=0.1):
        self.alpha = alpha
        self.lambda_ = lambda_
        self.epochs = epochs
        self.threshold = threshold
        self.theta = None

    def fit(self, X, y):
        m, n = X.shape
        self.theta = np.ones(n)  # Initialize weights to 1

        for epoch in range(self.epochs):
            # Gradient for MSE
            gradient = (-1/m) * X.T @ (y - X @ self.theta)

            # Add L1 regularization (Lasso)
            gradient += self.lambda_ * np.sign(self.theta)

            # Update weights
            self.theta -= self.alpha * gradient

            # Thresholding small weights to zero
            self.theta[np.abs(self.theta) < self.threshold] = 0

            print(f"Epoch {epoch + 1}: Theta = {self.theta}")

    def predict(self, X):
        return X @ self.theta

    def get_weights(self):
        return self.theta


# Example dataset with 3 features
X = np.array([[2, 0.5, 0],
              [3, 0.2, 0.1],
              [5, 0.1, -0.2]])
y = np.array([6, 9, 15])

# Run Lasso Regression
optimal_theta = LassoRegressionGD(epochs=19)
optimal_theta.fit(X,y)

print("\nFinal theta values after Lasso regression:", optimal_theta.get_weights())

Epoch 1: Theta = [3.43666667 1.08       0.90166667]
Epoch 2: Theta = [2.77899444 0.98863333 0.86035278]
Epoch 3: Theta = [2.95980538 0.94421739 0.80376206]
Epoch 4: Theta = [2.9133778  0.88758884 0.75148458]
Epoch 5: Theta = [2.92850268 0.8347765  0.69821092]
Epoch 6: Theta = [2.92692319 0.78143355 0.64537897]
Epoch 7: Theta = [2.92984564 0.72873459 0.59259821]
Epoch 8: Theta = [2.9315237  0.67635805 0.53997361]
Epoch 9: Theta = [2.93351467 0.62438781 0.48747588]
Epoch 10: Theta = [2.93539671 0.57279791 0.43511209]
Epoch 11: Theta = [2.93728431 0.52159216 0.38287949]
Epoch 12: Theta = [2.93914659 0.47076634 0.330778  ]
Epoch 13: Theta = [2.94099209 0.42031841 0.27880679]
Epoch 14: Theta = [2.94281865 0.37024578 0.22696526]
Epoch 15: Theta = [2.94462701 0.32054602 0.17525275]
Epoch 16: Theta = [2.94641714 0.27121667 0.12366863]
Epoch 17: Theta = [2.9481892 0.2222553 0.       ]
Epoch 18: Theta = [2.94825834 0.1736595  0.        ]
Epoch 19: Theta = [2.95164161 0.12554483 0.        ]

Fina