In [4]:
import numpy as np
#from common.gradient import numerical_gradient
from collections import OrderedDict
from src.functions import *
from src.layers import *
from src.VGGnet import *

In [5]:
#load MNIST dataset
import sys, os
sys.path.append(os.pardir) 
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
x_train, t_train = x_train[:5000], t_train[:5000]
x_test, t_test = x_test[:1000], t_test[:1000]

In [6]:
network = VGGNet(input_dim=(1,28,28), 
                        conv_params = {'filter_size': 3, 'pad': 1, 'stride': 1},
                        output_size=10, weight_init_std=0.01)
iter_num = 20
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(iter_num/batch_size, 1)

for i in range(iter_num):
    #select mini batch
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    #calculate gradient of mini batch
    grad = network.gradient(x_batch, t_batch)

    for key in ('conv1_W', 'conv1_b', 'conv2_W', 'conv2_b','conv3_W', \
        'conv3_b','conv4_W', 'conv4_b','conv5_W', 'conv5_b','fcl1_W', 'fcl1_b', 'fcl2_W', 'fcl2_b'):
        network.params[key] -= grad[key]*learning_rate

    #save losses
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i% iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)

        print("train acc, test acc-> " , train_acc, test_acc, "\n")