<a href="https://colab.research.google.com/github/log-ghj/automatic-model-selection/blob/main/ridge_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Self-coded ridge ression for the project in the course "Automatic Model Selection".

In [None]:
# Some imports
import numpy as np
import pandas as pd
from statsmodels.tools.tools import add_constant
from sklearn.linear_model import Ridge

  import pandas.util.testing as tm


In [None]:
# Read the data
df = pd.read_csv('https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.csv').dropna()

# Convert number of rooms into per person values
df["total_rooms_pp"] = df["total_rooms"]/df["population"]
df["total_bedrooms_pp"] = df["total_bedrooms"]/df["population"]
# Average hosuehold size
df["household_size"] = df["population"]/df["households"]
# Drop some variables
df=df.drop(["total_rooms", "total_bedrooms", "households"], axis=1)

# Make the categorical variable into a set of dummies
xx = pd.get_dummies(df.ocean_proximity)
df = pd.concat([df, xx], axis=1, sort=False)
del df["ocean_proximity"]

In [None]:
# ridge formula as result of minimization
def ridge(lambdas, X, y):
  # containers for results
  coefs = []
  ics = []
  
  # matrix mulitpliation to increase readablity
  X_m = np.asarray(add_constant(X, prepend=True))   # prepend intercept
  XX = X_m.T@X_m

  for λ in lambdas:
    β = np.linalg.inv(XX+λ*np.eye(N=len(XX)))@X_m.T@y   # formula of slide 62
    coefs.append(β)
    y_hat = X_m@β
    ics.append(aic(y, y_hat, k = len(β)))

  opt = ics.index(min(ics))
  return coefs[opt], lambdas[opt], ics[opt]


# ridge as data augmentation
def ridge_augmented(lambdas, X, y):
  # containers for results
  coefs = []
  ics = []

  # augment data (that does not include lambda)
  X_m = np.asarray(add_constant(X, prepend=True))
  y_m = np.asarray(y)
  y_cont = np.zeros(len(X.columns)+1)
  y_aug = np.r_[y_m,y_cont]

  for λ in lambdas:
    # augment X (including lamvda in loop)
    X_cont = np.sqrt(λ)*np.eye(len(X.columns)+1)
    X_aug = np.r_[X_m,X_cont]

    # matrix multiplication to increase readability
    XX = X_aug.T@X_aug
    β = np.linalg.inv(XX)@X_aug.T@y_aug
    
    # save results
    coefs.append(β)
    y_hat = X_aug@β
    ics.append(aic(y_aug, y_hat, k = len(β)))

  # return optimal results
  opt = ics.index(min(ics))
  return coefs[opt], lambdas[opt], ics[opt]

#sklearn ridge
def ridge_sklearn(lambdas, X, y):
  ridge = Ridge(fit_intercept=True, normalize=False)
  coefs = []
  intercepts = []
  ics = []
  k = len(X.columns)+1        # number of variables + constant

  for a in lambdas:
    ridge.set_params(alpha = a)
    ridge.fit(X, y)
    y_pred = ridge.predict(X)
    coefs.append(ridge.coef_)
    intercepts.append(ridge.intercept_)
    ics.append(aic(y, y_pred, k))
  opt = ics.index(min(ics))
  return np.r_[intercepts[opt], coefs[opt]], lambdas[opt], ics[opt]

# create AIC function
def aic(y, y_pred, k):
  resid = y - y_pred
  sse = sum(resid ** 2)
  AIC = 2*k - 2*np.log(sse)
  return AIC

In [None]:
# create grid of lambdas
grid = 10**np.linspace(4,-2,100)*0.5

# split l.h.s, and r.h.s.
X = df.drop(['median_house_value'], axis=1)
y = df.median_house_value

# normalize data
X = (X-X.mean())/X.std()
y = y-y.mean()

In [None]:
ridge(grid, X, y)

(array([-1.14841949e-08, -1.04033798e+04, -1.01380183e+04,  9.07181126e+03,
         8.73600349e+02,  5.62527994e+04,  8.82531904e+03,  5.36316283e+03,
        -2.10251993e+03,  9.87374760e+03, -2.28087267e+04,  2.31622020e+03,
         8.15517713e+03,  9.31551562e+03]), 5000.0, -36.63898218651468)

In [None]:
ridge_augmented(grid, X, y)

(array([-1.14841896e-08, -1.04033798e+04, -1.01380183e+04,  9.07181126e+03,
         8.73600349e+02,  5.62527994e+04,  8.82531904e+03,  5.36316283e+03,
        -2.10251993e+03,  9.87374760e+03, -2.28087267e+04,  2.31622020e+03,
         8.15517713e+03,  9.31551562e+03]), 5000.0, -37.00350473739984)

In [None]:
ridge_sklearn(grid, X, y)

(array([-1.43114874e-08, -1.04033798e+04, -1.01380183e+04,  9.07181126e+03,
         8.73600349e+02,  5.62527994e+04,  8.82531904e+03,  5.36316283e+03,
        -2.10251993e+03,  9.87374760e+03, -2.28087267e+04,  2.31622020e+03,
         8.15517713e+03,  9.31551562e+03]), 5000.0, -36.63898218651468)