Для Ridge обозначения сохраняются, но функция потерь меняется: 
   
$Q = ||\mathbb{Y} - \mathbb{X}\omega||_2^2 + \alpha||\omega||_2^2$

Найдем шаг градиентного спуска для L2-регуляризации аналогичным способом:  
  
  $dQ\big|_{\omega_{*}} = -2\mathbb{X}^T<\mathbb{Y} - \mathbb{X}\omega, \omega_{*}> +  2\alpha<w, \omega_{*}> = 2<\mathbb{X}^T(\mathbb{X}\omega - \mathbb{Y}) + \alpha\omega, \omega_{*}>$  
    
  $\nabla_{\omega}Q = 2(\mathbb{X}^T(\mathbb{X}\omega - \mathbb{Y}) + \alpha\omega)$

In [None]:
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin

class Ridge(BaseEstimator, RegressorMixin):
    
    def __init__(self, alpha, batch_size=35, num_steps=500, lr=1e-2):
        self.alpha = alpha
        self.batch_size = batch_size
        self.num_steps = num_steps
        self.lr = lr

    def fit(self, X, Y):
        w = np.random.randn(X.shape[1])[:, None]
        n_objects = len(X)

        for i in range(self.num_steps):
            sample_indices = np.random.randint(0, n_objects, size=self.batch_size)
            w -= 2 * self.lr * ( np.dot(X[sample_indices].T, np.dot(X[sample_indices], w) - Y[sample_indices]) / self.batch_size + self.alpha * w)

        self.w = w
        return self

    def predict(self, X):
        return X@self.w