In [2]:
from sklearn.base import RegressorMixin

import numpy as np


Напишем линейную регрессию с l2 регуляризацией (ridge)

Обучать ее будем с помощью стохастического градиентного спуска

In [39]:
class ridge(RegressorMixin):
    def __init__(self, lr = 0.01, lam=0.1, batch_size = 64, epochs = 200):
        super().__init__()
        self.lr = lr
        self.lam = lam
        self.batch_size = batch_size
        self.epochs = epochs
        self.w = None
        self.b = None

    def fit(self, X:np.ndarray, Y):
        n,d = X.shape
        self.w = np.random.random(d)
        self.b = 0
        for _ in range(self.epochs):
            idx = np.random.choice(np.arange(n), self.batch_size)

            x = X[idx]
            y = Y[idx]

            error = np.dot(x,self.w) + self.b - y
            w_grad = 2 * np.matmul(x.T, error) + 2 * self.lam * self.w
            b_grad = 2 * np.sum(error)

            self.w -= self.lr * w_grad / self.batch_size
            self.b -= self.lr * b_grad / self.batch_size
        return self

    def predict(self, X:np.ndarray):
        y_pred = []

        for i in range(len(X)):
            y = np.dot(self.w, X[i]) + self.b
            y_pred.append(y)

        return y_pred
            

Протестируем написанную модель. 

In [40]:
from sklearn import datasets
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split



X, Y = datasets.load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
lr = ridge()
lr.fit(X_train, y_train)
pred = lr.predict(X_test)
mean_squared_error(pred,y_test)

5583.705040881627