# Ridge Regression for nd data

In [40]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [41]:
X,y = load_diabetes(return_X_y = True)

In [42]:
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=13)

In [43]:
from sklearn.linear_model import Ridge

In [44]:
reg = Ridge(alpha=0.1, solver = 'cholesky')

In [45]:
reg.fit(X_train,y_train)

In [46]:
y_pred = reg.predict(X_test)

In [47]:
r2_score(y_test, y_pred)

0.37915546090987395

In [48]:
reg.coef_

array([  -5.10373917, -211.97523484,  532.48566195,  297.38876175,
        -57.55670284,  -95.16345048, -206.58948886,  105.25789549,
        437.89687435,  124.41404427])

In [49]:
reg.intercept_

150.32508265832132

### From scratch

In [50]:
class RidgeLR:
    def __init__(self,alpha=0.1):
        self.alpha = alpha
        self.coef_ = None
        self.intercept_ = None

    def fit(self,X_train,y_train):
        X_train = np.insert(X_train,0,1, axis=1)
        I = np.identity(X_train.shape[1])
        # I[0][0] =0
        result = np.linalg.inv(np.dot(X_train.T, X_train) + self.alpha *I).dot(X_train.T.dot(y_train))
        self.intercept_ = result[0]
        self.coef_ = result[1:]
    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [51]:
reg1 = RidgeLR()

In [52]:
reg1.fit(X_train,y_train)

In [53]:
y_pred1 = reg1.predict(X_test)

In [54]:
r2_score(y_test,y_pred1)

0.37898964959371806

In [55]:
reg1.coef_

array([  -5.13593875, -211.98729607,  532.49470045,  297.3764734 ,
        -57.53286138,  -95.10596065, -206.59160844,  105.24379761,
        437.90471083,  124.43126839])

In [56]:
reg1.intercept_

150.28212715147606