In [12]:
import pandas as pd
import numpy as np

In [13]:
from sklearn.datasets import load_diabetes
data = load_diabetes()
X, y = data.data, data.target


In [14]:
# Look at available feature names
print(data.feature_names)

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']


In [None]:
# Convert to DataFrame to easily select one column
df = pd.DataFrame(X, columns=data.feature_names)

# Select one feature column
X = df['bp'] # double brackets keep it as 2D array shape (n_samples, 1)

print(X.shape)  
print(y.shape)

(442,)
(442,)


In [16]:
X = np.array(X)


In [17]:
from sklearn.model_selection import train_test_split

In [18]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [19]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(353,)
(89,)
(353,)
(89,)


# Ridge Regression Formulas

## Loss Function
$$J(w) = \sum_{i=1}^{n} (y_i - \hat{y}_i)^2 + \alpha \sum_{j=1}^{p} w_j^2$$

## Closed-Form Solution (Single Feature)
- Slope:
$$m = \frac{\sum_{i=1}^{n} (x_i - \bar{x})(y_i - \bar{y})}{\sum_{i=1}^{n} (x_i - \bar{x})^2 + \alpha}$$
- Intercept:
$$b = \bar{y} - m \bar{x}$$


In [20]:

class RidgeRegression:
    def __init__(self, alpha):
        self.alpha = alpha  # Regularization parameter
        self.m = None       # Slope
        self.b = None       # Intercept

    def fit(self, X, y):
        X = np.array(X, dtype=float)
        y = np.array(y, dtype=float)

        # Compute mean values
        X_mean = X.mean()
        y_mean = y.mean()

        # Numerator and denominator for slope
        numerator = 0
        denominator = 0
        for i in range(len(X)):
            numerator += (X[i] - X_mean) * (y[i] - y_mean)
            denominator += (X[i] - X_mean) ** 2

        # Add ridge regularization to denominator
        denominator += self.alpha

        # Compute slope (m) and intercept (b)
        self.m = numerator / denominator
        self.b = y_mean - self.m * X_mean

    def predict(self, X):
        X = np.array(X, dtype=float)
        return self.m * X + self.b


In [49]:
Ridge = RidgeRegression(0.01)

In [50]:
Ridge.fit(X_train,y_train)

In [51]:
Ridge.alpha

0.01

In [52]:
Ridge.m

np.float64(709.1222388499481)

In [53]:
Ridge.b

np.float64(154.04209027075208)

In [54]:
Ridge.predict(X_test)

array([203.73191594, 142.69686114, 145.13826334, 140.25545895,
       186.6421006 , 154.9038721 , 193.96630718, 120.72424142,
       191.52490498, 176.07082911, 145.13826334, 203.73191594,
       193.96630718, 145.13826334, 142.69686114, 130.48985018,
       120.72424142, 166.30522034, 189.08350279, 193.96630718,
       110.15296992, 106.88149099, 178.5122313 , 137.81405676,
       157.3452743 , 142.69686114, 155.70953483, 130.48985018,
       194.7719699 , 186.6421006 , 179.31789402,  76.77900196,
       162.22807868, 125.6070458 , 120.72424142, 142.69686114,
       123.16564361, 135.37265457, 135.37265457, 169.55228526,
       189.08350279, 181.75929622, 101.19302388, 164.66948087,
       150.02106772, 125.6070458 ,  98.75162169, 145.13826334,
       142.69686114, 150.02106772, 180.95363349, 108.51723045,
       201.29051375, 130.48985018, 193.96630718, 150.02106772,
       162.22807868, 115.84143703, 118.28283922, 125.6070458 ,
       150.02106772, 164.66948087, 154.9038721 , 123.16

## Comparing the result with Scikit-Learn Library

In [55]:
from sklearn.linear_model import Ridge

In [57]:
model = Ridge(alpha=0.01)

In [59]:
model.fit(X_train.reshape(-1,1),y_train)

In [60]:
model.alpha

0.01

In [61]:
model.coef_

array([709.12223885])

In [62]:
model.intercept_

np.float64(154.04209027075208)

In [63]:
model.predict(X_test.reshape(-1,1))

array([203.73191594, 142.69686114, 145.13826334, 140.25545895,
       186.6421006 , 154.9038721 , 193.96630718, 120.72424142,
       191.52490498, 176.07082911, 145.13826334, 203.73191594,
       193.96630718, 145.13826334, 142.69686114, 130.48985018,
       120.72424142, 166.30522034, 189.08350279, 193.96630718,
       110.15296992, 106.88149099, 178.5122313 , 137.81405676,
       157.3452743 , 142.69686114, 155.70953483, 130.48985018,
       194.7719699 , 186.6421006 , 179.31789402,  76.77900196,
       162.22807868, 125.6070458 , 120.72424142, 142.69686114,
       123.16564361, 135.37265457, 135.37265457, 169.55228526,
       189.08350279, 181.75929622, 101.19302388, 164.66948087,
       150.02106772, 125.6070458 ,  98.75162169, 145.13826334,
       142.69686114, 150.02106772, 180.95363349, 108.51723045,
       201.29051375, 130.48985018, 193.96630718, 150.02106772,
       162.22807868, 115.84143703, 118.28283922, 125.6070458 ,
       150.02106772, 164.66948087, 154.9038721 , 123.16