In [32]:
import numpy as np

def sgd(
    gradient, x, y, start, learn_rate=0.1, batch_size=1, n_iter=50,
    tolerance=1e-06, dtype="float64", random_state=None
):
    # Checking if the gradient is callable
    if not callable(gradient):
        raise TypeError("'gradient' must be callable")

    # Setting up the data type for NumPy arrays
    dtype_ = np.dtype(dtype)

    # Converting x and y to NumPy arrays
    x, y = np.array(x, dtype=dtype_), np.array(y, dtype=dtype_)
    n_obs = x.shape[0]
    if n_obs != y.shape[0]:
        raise ValueError("'x' and 'y' lengths do not match")
    xy = np.c_[x.reshape(n_obs, -1), y.reshape(n_obs, 1)]

    # Initializing the random number generator
    seed = None if random_state is None else int(random_state)
    rng = np.random.default_rng(seed=seed)

    # Initializing the values of the variables
    vector = np.array(start, dtype=dtype_)

    # Setting up and checking the learning rate
    learn_rate = np.array(learn_rate, dtype=dtype_)
    if np.any(learn_rate <= 0):
        raise ValueError("'learn_rate' must be greater than zero")

    # Setting up and checking the size of minibatches
    batch_size = int(batch_size)
    if not 0 < batch_size <= n_obs:
        raise ValueError(
            "'batch_size' must be greater than zero and less than "
            "or equal to the number of observations"
        )

    # Setting up and checking the maximal number of iterations
    n_iter = int(n_iter)
    if n_iter <= 0:
        raise ValueError("'n_iter' must be greater than zero")

    # Setting up and checking the tolerance
    tolerance = np.array(tolerance, dtype=dtype_)
    if np.any(tolerance <= 0):
        raise ValueError("'tolerance' must be greater than zero")

    # Performing the gradient descent loop
    for _ in range(n_iter):
        # Shuffle x and y
        rng.shuffle(xy)

        # Performing minibatch moves
        for start in range(0, n_obs, batch_size):
            stop = start + batch_size
            x_batch, y_batch = xy[start:stop, :-1], xy[start:stop, -1:]

            # Recalculating the difference
            grad = np.array(gradient(x_batch, y_batch, vector), dtype_)
            print("gradient ", grad)
            diff = -learn_rate * grad

            # Checking if the absolute difference is small enough
            if np.all(np.abs(diff) <= tolerance):
                break

            # Updating the values of the variables
            vector += diff
        print("epoch no : ",_, "  ",vector ,"  ",diff)
    return vector if vector.shape else vector.item()

In [30]:
x = np.array([5, 15, 25, 35, 45, 55])
y = np.array([5, 20, 14, 32, 22, 38])

In [5]:
def ssr_gradient(x, y, b):
    res = b[0] + b[1] * x - y
    return res.mean(), (res * x).mean()

In [27]:

def gradient_descent(
    gradient, x, y, start, learn_rate=0.1, n_iter=50, tolerance=1e-06
):
    vector = start
    for _ in range(n_iter):
        diff = -learn_rate * np.array(gradient(x, y, vector))
        if np.all(np.abs(diff) <= tolerance):
            break
        vector += diff
        print("epoch no : ",_, "  ",vector ," ",diff)
    return vector

In [None]:
gradient_descent(
     ssr_gradient, x, y, start=[0.5, 0.5], learn_rate=0.0008,
      n_iter=10
)

In [34]:

sgd(
     ssr_gradient, x, y, start=[0.5, 0.5], learn_rate=0.0008,
     batch_size=3, n_iter=10, random_state=0
)

gradient  [  -8.33333333 -355.        ]
gradient  [  1.82666667 167.17777778]
epoch no :  0    [0.50520533 0.65025778]    [-0.00146133 -0.13374222]
gradient  [ -2.740928   -27.76340148]
gradient  [ 0.10181386 29.41285707]
epoch no :  1    [0.50731662 0.64893821]    [-8.14510899e-05 -2.35302857e-02]
gradient  [ -0.10610067 -12.34338434]
gradient  [ -3.296856   -24.14065244]
epoch no :  2    [0.51003899 0.67812544]    [0.00263748 0.01931252]
gradient  [  -6.05766123 -138.90427584]
gradient  [  8.73358776 371.11597989]
epoch no :  3    [0.50789825 0.49235608]    [-0.00698687 -0.29689278]
gradient  [  -3.64393177 -190.7973226 ]
gradient  [  -6.66429775 -155.17973513]
epoch no :  4    [0.51614483 0.76913773]    [0.00533144 0.12414379]
gradient  [  3.64171373 205.04775736]
gradient  [  -5.32528375 -199.81418802]
epoch no :  5    [0.51749169 0.76495087]    [0.00426023 0.15985135]
gradient  [ 1.84060838 76.21567881]
gradient  [1.02216290e-01 1.06697314e+02]
epoch no :  6    [0.51593743 0.61862

array([0.52615622, 0.81111927])