# Линейная регрессия с L2 регуляризацией

In [1]:
import numpy as np

In [2]:
class LinReg():
    
    def __init__(self, batch=25, steps=350, lr=1e-2):
        self.batch = batch
        self.steps = steps
        self.lr = lr

    def fit(self, X, Y, lambda_parametr=1e-3):
        w = np.random.randn(X.shape[1])[:, None]
        n = len(X)
        for i in range(self.steps):
            rand_ind = np.random.randint(0, n, size=self.batch)
            w -= 2*self.lr * np.dot(X[rand_ind].T, np.dot(X[rand_ind], w) - Y[rand_ind]) / self.batch +2*lambda_parametr

        self.w = w
        return self

    def predict(self, X):
        return X.dot(self.w)

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

#Генерация данных для тестирования
n_features = 700
n_objects = 100000

w_true = np.random.uniform(-2, 2, (n_features, 1))

X = np.random.uniform(-100, 100, (n_objects, n_features)) * np.arange(n_features)
Y = X.dot(w_true) + np.random.normal(0, 10, (n_objects, 1))

In [4]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y)

In [5]:
scaler = StandardScaler()
scaler.fit(x_train)
x_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [6]:
own_model = LinReg().fit(x_scaled, y_train)
y_pred = own_model.predict(x_test_scaled)
own_r2 = r2_score(y_test, y_pred)

sklearn_model = LinearRegression().fit(x_scaled, y_train)
y_pred = sklearn_model.predict(x_test_scaled)

sklearn_r2 = r2_score(y_test, y_pred)

print('R^2 in own model:', own_r2)
print('R^2 in sklearn loss:', sklearn_r2)

R^2 in own model: 0.9999639065761372
R^2 in sklearn loss: 0.9999999998068125
