In [3]:
import pandas as pd
import numpy as np

# Stochastic gradient descent

In [4]:
def stochastic_gradient_descent(X, y, initial_solution, calculate_gradient, learning_rate=0.01, max_num_epoch=1000):
    """
    Performs stochastic gradient descent optimization.

    Parameters:
    - X: Input data.
    - y: Target labels.
    - initial_solution: Initial solution for optimization.
    - calculate_gradient: Function to calculate the gradient.
    - learning_rate: Learning rate for updating the solution (default: 0.01).
    - max_num_iters: Maximum number of iterations (default: 1000).

    Returns:
    - The optimized solution.
    """

    # initialization
    if type(X) is pd.DataFrame:
        X = X.to_numpy()
    if type(y) is pd.DataFrame:
        y = y.to_numpy().T
    current_solution = initial_solution 

    for _ in range(max_num_epoch):
        random_state = np.random.RandomState()
        X_selected = X.sample(n=1, random_state=random_state)
        y_selected = y.sample(n=1, random_state=random_state)

        gradient = calculate_gradient(X_selected, y_selected, current_solution)
        initial_solution = initial_solution - learning_rate * gradient
    return initial_solution

#TODO

1. implement stop condition - additional parameter loss_tolerance
2. using batches 
3. shuffling at the beginning of epoch 
4. iteration over shuffled array 
5. implement later stochastic GD as mini batch
6. mini batch 2 additional parameters batch size or frac_of initial data

In [18]:
#TEST
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
y = np.array([1, 2, 3, 4, 5])
initial_solution = np.array([0, 0])
calculate_gradient = lambda X, y, w: -2 * X.T.dot(y - X.dot(w))
print(stochastic_gradient_descent(X, y, initial_solution, calculate_gradient)) # Expected output: [0.9999999999999999, 1.9999999999999998]

ValueError: matrices are not aligned

# Mini batch gradient descent

# Mini batch gradient descent with momentum 

# AdaGrad

# RMSProp

# Adam