In [None]:
import os, sys
sys.path.append("C:/Users/isang/OneDrive/Desktop/DL/deep-learning-from-scratch-master")

import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.util import smooth_curve
from common.multi_layer_net import MultiLayerNet

# Optimizer classes  
# Gradient Descent class
class GD:
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

# Stochastic Gradient Descent class
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

# Mini-batch Stochastic Gradient Descent class
class mini_SGD:
    def __init__(self, lr=0.64):
        self.lr = lr
        
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

In [None]:
# Load & Read MNIST Dataset
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_size = x_train.shape[0]
batch_size1 = 1
batch_size2 = 64

# Experiment setup
optimizers = {
    'SGD' : SGD(lr = 0.01),
    'GD' : GD(lr = 0.01),
    'mini_SGD' : mini_SGD(lr = 0.64)
}

networks = {}
train_loss = {}

# Initialize networks & loss tracking  
for key in optimizers.keys():
    networks[key] = MultiLayerNet(
        input_size=784,
        hidden_size_list=[100, 100, 100, 100],
        output_size=10
    )
    train_loss[key] = []

In [8]:
# Training 
# Training loop

for epoch in range(2000):
    # Sample a data for SGD
    batch_mask1 = np.random.choice(train_size, batch_size1)
    x_batch1 = x_train[batch_mask1]
    t_batch1 = t_train[batch_mask1]

    # Sample a mini-batch for SGD
    batch_mask2 = np.random.choice(train_size, batch_size2)
    x_batch2 = x_train[batch_mask2]
    t_batch2 = t_train[batch_mask2]


    for key in optimizers.keys():
        # Use full dataset for GD, mini-batch for SGD
        if key == 'GD':
            x_input = x_train
            t_input = t_train
        elif key == 'SGD':
            x_input = x_batch1
            t_input = t_batch1
        else:
            x_input = x_batch2
            t_input = t_batch2            

        # Compute gradients and update parameters
        grads = networks[key].gradient(x_input, t_input)
        optimizers[key].update(networks[key].params, grads)

        # Compute and record loss
        loss = networks[key].loss(x_input, t_input)
        train_loss[key].append(loss)

    # Print loss every 100 iterations
    if epoch % 100 == 0:
        print("========== epoch: " + str(epoch) + " ==========")
        for key in optimizers.keys():
            if key == 'GD':
                x_input = x_train
                t_input = t_train
            elif key == 'SGD':
                x_input = x_batch1
                t_input = t_batch1
            else:  # e.g., 'mini_SGD' or others
                x_input = x_batch2
                t_input = t_batch2
                
            loss = networks[key].loss(x_input, t_input)
            print(f"{key}: {loss}")

SGD: 1.3661220968693282
GD: 2.3184861723813155
mini_SGD: 2.4405026959527785


KeyboardInterrupt: 