In [1]:
import numpy as np

In [2]:
x= np.array([np.random.randint(1, 100) for i in range(6)])
y= np.array([np.random.randint(1, 100) for i in range(6)])
print(x,y)

[25 52 55 71  6 66] [46 11 30 19 10 62]


In [3]:
def stochastic_gradient_descent(gradient, x, y, randomv=None, init = None, lr = .1, decay_factor = .0,
                                batch = 1, epochs = 50, tolerance = 1e-06,
                                data_type="float64", random_state = None):
    
    dt = np.dtype(data_type)
    
    x,y = np.array(x, dtype = dt), np.array(y, dtype = dt)
    
    obs = x.shape[0]
    if obs != y.shape[0]:
        raise ValueError("x & y values lengths must match!")        
    xy = np.c_[x.reshape(obs, -1), y.reshape(obs, 1)]
    
    
    seed = None if random_state is None else int(random_state)
    random_n = np.random.default_rng(seed = seed)
    
    vector = (
        random_n.normal(size=int(randomv)).astype(dt)
        if init is None else
        np.array(init, dtype=dt)
    )
    
    
    lr = np.array(lr, dtype = dt)
    
    if np.any(lr <= 0):
        raise ValueError("Learning rate value must be greater than 0")

    decay_factor = np.array(decay_factor, dtype=dt)
    if np.any(decay_factor < 0) or np.any(decay_factor > 1):
        raise ValueError("Decay factor value must be between 0 and 1")
        
    batch = int(batch)    
    if not 0 < batch <= obs:
        raise ValueError("Batch size value must be greater than 0 and less or equal to the number of observations")
        
    tolerance = np.array(tolerance, dtype=dt)
    if np.any(tolerance <= 0):
        raise ValueError("Tolerance value must be greater than 0")
        
    difference = 0    
        
    for i in range(epochs):
        random_n.shuffle(xy)
        
        for init in range(0, obs, batch):
            stop = init + batch
            x_batch, y_batch = xy[init:stop, :-1], xy[init:stop, -1:]
            
            gradient_ = np.array(gradient(x_batch, y_batch, vector), dt)
            difference = decay_factor * difference - lr * gradient_
            
            if np.all(np.abs(difference) <= tolerance):
                break
            
            vector += difference
            
    return vector if vector.shape else vector.item()

In [4]:
def ssr_gradient(x, y, b):
    res = b[0] + b[1] * x - y
    return res.mean(), (res * x).mean()

In [5]:
stochastic_gradient_descent(ssr_gradient, x, y, randomv = 2, lr=.0001, decay_factor = .8, batch = 3, epochs = 100000, random_state = 0)

array([19.40776474,  0.23048986])