### Mini-batch Gradient Descent

The training set is divided into number of batches of perticular size and cost of batch used to update the weights.

<img src=img/mini_batch_gradient_descent.png width=300 height=300 align="left">

In [1]:
import numpy as np

In [2]:
def mini_batch_gradient_descent(X, y, theta, alpha, epoch, batch_size):
    for _ in range(epoch):
        for i in range(0, len(X), batch_size):
            
            x_batch = X[i:i+batch_size]
            y_batch = y[i:i+batch_size]
            
            y_hat = np.dot(x_batch, theta)
            # (1.0/batch_size) * np.dot(x_batch.T, y_hat - y_batch) - this is derivative of cost function
            theta = theta - alpha * (1.0/batch_size) * np.dot(x_batch.T, y_hat - y_batch)
    return theta

In [3]:
# Input features
X = np.array([
    [1, 2, 1],
    [1, 2, 3],
    [2, 1, 1],
    [3, 1, 1],
    [2, 3, 1]
])
# Output targets
y = np.array([1, 2, 3, 4, 5])

# Test set
test = np.array([1, 2, 1])

In [4]:
# Number of epochs
epoch = 1000
# Weights
theta = np.array([0.1, 0.1, 0.1])
# Learning rate
alpha = 0.001
# Batch size
batch_size = 2

In [5]:
# Learned weights
new_theta = mini_batch_gradient_descent(X, y, theta, alpha, epoch, batch_size)

In [6]:
# Test on learned weights
predicted = np.dot(test, new_theta)
print(predicted)

2.2783745069087784
