In [103]:
import pandas as pd
import numpy as np

In [104]:
from sklearn.datasets import load_diabetes
data = load_diabetes()
X, y = data.data, data.target


In [105]:
# Look at available feature names
print(data.feature_names)

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']


In [106]:
# Convert to DataFrame to easily select one column
df = pd.DataFrame(X, columns=data.feature_names)

# Select one feature column, e.g., 'MedInc'
X = df['bp'] # double brackets keep it as 2D array shape (n_samples, 1)

print(X.shape)  # should be (20640, 1)
print(y.shape)

(442,)
(442,)


In [107]:
X = np.array(X)


In [108]:
from sklearn.model_selection import train_test_split

In [109]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [110]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(353,)
(89,)
(353,)
(89,)


# 📝 Lasso Regression Formulas



###  Lasso Regression Loss (L1 Regularization)
$$
\text{Loss: } J(\mathbf{w}) = \sum_{i=1}^{n} (y_i - \hat{y}_i)^2 + \alpha \sum_{j=1}^{p} |w_j|
$$

- $\alpha$ → Regularization parameter (controls penalty strength)  
- $w_j$ → Weight for feature $j$  
- Bias $w_0$ is usually **not regularized**


In [111]:
import numpy as np

class LassoRegression:
    def __init__(self, alpha):
        self.alpha = alpha  # Regularization parameter
        self.m = None
        self.b = None

    def fit(self, X, y):
        self.X = np.array(X, dtype=float)
        self.y = np.array(y, dtype=float)

        numerator = 0
        denominator = 0

        for i in range(len(self.X)):
            numerator += (self.X[i] - self.X.mean()) * (self.y[i] - self.y.mean())
            denominator += (self.X[i] - self.X.mean()) ** 2

        # Approximate L1 regularization by reducing slope slightly
        # (simple version for demonstration)
        if numerator > 0:
            numerator -= self.alpha
        elif numerator < 0:
            numerator += self.alpha

        self.m = numerator / denominator
        self.b = self.y.mean() - self.m * self.X.mean()

    def predict(self, X):
        X = np.array(X, dtype=float)
        return self.m * X + self.b


In [112]:
MyLasso = LassoRegression(0.01)

In [113]:
MyLasso.fit(X_train,y_train)

In [114]:
MyLasso.m

np.float64(748.8126118615427)

In [115]:
MyLasso.b

np.float64(151.58625453590864)

In [116]:
MyLasso.alpha

0.01

In [117]:
lasso.predict(X_test)

array([154.91489841, 147.50134404, 149.97252883, 123.6049871 ,
       147.50134404, 198.58073369, 112.90475695, 112.90475695,
       122.78949612, 159.857268  , 107.96238736, 130.20305049,
       140.08778966, 145.03015925, 157.38608321, 112.90475695,
       110.43357216, 117.84712653, 169.74200717, 164.79963758,
       179.62674634, 147.50134404, 172.21319196, 159.857268  ,
       132.67423529, 181.28244015, 196.92503988, 127.7318657 ,
       187.04030071, 149.97252883, 149.97252883, 166.45533139,
       135.14542008, 159.857268  , 159.857268  , 231.52162697,
       125.26068091, 189.51148551, 194.45385509, 122.78949612,
       132.67423529, 114.56045076, 116.19143272, 100.54883299,
       135.14542008, 142.55897445, 117.84712653, 155.7303894 ,
       130.20305049, 204.33859426, 135.96091106, 145.03015925,
       125.26068091, 125.26068091, 149.97252883, 152.44371362,
       154.91489841, 224.92356358, 149.97252883, 125.26068091,
       206.80977905, 224.1080726 , 107.96238736, 201.86

## Comparing the result with Scikit-Learn Library

In [118]:
from sklearn.linear_model import Lasso

In [119]:
model = Lasso(alpha=0.01)

In [120]:
model.fit(X_train.reshape(-1,1),y_train)

In [121]:
model.alpha

0.01

In [122]:
model.coef_

array([744.47795735])

In [123]:
model.intercept_

np.float64(151.59073220743608)

In [124]:
model.predict(X_test.reshape(-1,1))

array([152.49548113, 144.80610114, 147.3692278 , 120.02066632,
       144.80610114, 197.78592924, 108.92232787, 108.92232787,
       119.17483452, 157.62173445, 103.79607455, 126.86421451,
       137.11672115, 142.24297448, 155.05860779, 108.92232787,
       106.35920121, 114.0485812 , 167.8742411 , 162.74798777,
       178.12674775, 144.80610114, 170.43736776, 157.62173445,
       129.42734117, 179.84404261, 196.06863438, 124.30108784,
       185.81612773, 147.3692278 , 147.3692278 , 164.46528264,
       131.99046783, 157.62173445, 157.62173445, 231.95240765,
       121.73796118, 188.37925439, 193.50550772, 119.17483452,
       129.42734117, 110.63962274, 112.33128633,  96.10669456,
       131.99046783, 139.67984782, 114.0485812 , 153.34131292,
       126.86421451, 203.75801436, 132.83629963, 142.24297448,
       121.73796118, 121.73796118, 147.3692278 , 149.93235446,
       152.49548113, 225.10885946, 147.3692278 , 121.73796118,
       206.32114103, 224.26302766, 103.79607455, 201.19