In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


## Ridge Regression
### What is a ridge?
Ridge is built on OLS by adding L2 penalty to the resudual sum of squares which discourages large coefficients and stabalizes the estimate.
- If coefficient is 0, then it performs as a normal linear regression.
- If coefficient is higher, it punishes the coefficients, making model mrore robust.

In [2]:
# for single variable
class RidgeRegression:
    def __init__(self, lamb):
        self.coef_= None
        self.intercept_ = None
        self.lamb_ = lamb

    def fit(self, X_train, y_train):
        num = 0
        den = 0

        for i in range(len(X_train)):
            num += (y_train[i] - np.mean(y_train)) * (X_train[i] - np.mean(X_train))
            den += (X_train[i] - np.mean(X_train))**2

        den += self.lamb_

        self.coef_ = num/den
        self.intercept_ = np.mean(y_train) - (self.coef_ * np.mean(X_train))

    def predict(self, X_test):
        return self.coef_ * X_test + self.intercept_


In [None]:
# For multiple variables
class RidgeRegression:
    def __init__(self, alpha):
        self.coef_ = None
        self.intercept_ = None
        self.alpha = alpha

    def fit(self, X_train, y_train):
        X_train = np.insert(X_train, 0, 1, axis=1)

        I = np.identity(X_train.shape[1])
        beta = np.linalg.inv(np.dot(X_train.T, X_train) + self.alpha * I).dot(X_train.T).dot(y_train)

        self.intercept_ = beta[0]
        self.coef_ = beta[1:]

    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [3]:
df = pd.read_csv("/Users/raaggee/Documents/MachineLearningImplementations/house_price_regression_dataset.csv")
df.head()

Unnamed: 0,Square_Footage,Num_Bedrooms,Num_Bathrooms,Year_Built,Lot_Size,Garage_Size,Neighborhood_Quality,House_Price
0,1360,2,1,1981,0.599637,0,5,262382.9
1,4272,3,3,2016,4.753014,1,6,985260.9
2,3592,1,2,2016,3.634823,0,9,777977.4
3,966,1,2,1977,2.730667,1,8,229698.9
4,4926,2,1,1993,4.699073,0,8,1041741.0


In [4]:
from sklearn.model_selection import train_test_split
X = df.iloc[:, :7]
y = df["House_Price"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
I = np.identity(X_train.shape[1])

In [8]:
I.shape

(7, 7)