# From Scratch
Sean Wade

In [2]:
from __future__ import division, print_function, absolute_import

import numpy as np
from pynet.layers import *
from pynet.utils.data import load_data
from pynet.utils import gradient_check
from pynet.optim import sgd

In [3]:
# Settings
REG = 0
BATCH_SIZE = 32
INPUT_DIM = 28 * 28
WEIGHT_SCALE=.01
HIDDEN_DIM = 100
NUM_CLASSES = 10
EPOCHS = 25000

In [4]:
# Load data
x_train, y_train, x_test, y_test, y_train_num, y_test_num = load_data('mnist')
# x_train : (60000, 28, 28, 1)
# y_tarin : (60000, 10)
# x_test : (10000, 28, 28, 1)
# y_test : (10000, 10

In [5]:
# Initialize weights to train
params = {}
params['W1'] = np.random.normal(scale=WEIGHT_SCALE, size=(INPUT_DIM, HIDDEN_DIM))
params['W2'] = np.random.normal(scale=WEIGHT_SCALE, size=(HIDDEN_DIM, NUM_CLASSES))
params['b1'] = np.zeros(HIDDEN_DIM)
params['b2'] = np.zeros(NUM_CLASSES)

In [6]:
print('Initialized Paramaters:')
for name, tensor in params.iteritems():
    print('%s: ' % name, tensor.shape)

Initialized Paramaters:
b2:  (10,)
W2:  (100, 10)
W1:  (784, 100)
b1:  (100,)


In [None]:
# Training
loss_hist = []
acc_hist = []
print('Starting training...')
for i in range(EPOCHS):  
    # Shuffle the data for batch
    d = len(y_train)
    num_iters_per_epoch = EPOCHS // d
    shuffle = np.random.permutation(d)
    x_train = x_train[shuffle]
    y_train = y_train[shuffle] 
    
    for j in range(1, num_iters_per_epoch+1):
        # Make minibatch
        batch_start = (j-1)*BATCH_SIZE
        batch_end = j*BATCH_SIZE
        x_batch = x_train[batch_start:batch_end]
        y_batch = y_train[batch_start:batch_end]
        y_batch_num = y_train_num[batch_start:batch_end]

        # Forward Pass
        h1, linear_cache_1 = linear_forward(x_batch, params['W1'], params['b1'])
        a1, relu_cache_1= relu_forward(h1)
        h2, linear_cache_2 = linear_forward(a1, params['W2'], params['b2'])
        a2, relu_cache_2 = relu_forward(h2)

        # Loss
        probs, loss, dx = softmax_loss(a2, y_batch_num)
        loss += .5 * REG * np.sum(params['W1']**2) + .5 * REG * np.sum(params['W2']**2)

        # Backwards Pass
        grads = {}
        da = relu_backward(dx, relu_cache_2)
        dx_2, grads['W2'], grads['b2'] = linear_backward(da, linear_cache_2)
        da_2 = relu_backward(dx_2, relu_cache_1)
        dx, grads['W1'], grads['b1'] = linear_backward(da_2, linear_cache_1)

        # Regularization (Maybe optional...)
        grads['W2'] += REG * params['W2']
        grads['W1'] += REG * params['W1']

        # Parameter update
        for p, w in params.iteritems():
            dw = grads[p]
            next_w, next_config = sgd(w, dw)
            params[p] = next_w  # Update weights

        if j % 10 == 0:
            # Calculate the accuracy
            y_pred = np.argmax(probs, axis=1) # Get the higest prob index
            train_acc = np.mean(y_pred == y_batch_num)
            acc_hist.append(train_acc)
            loss_hist.append(loss)
            print("[{}] loss: {}, Acc: {}".format(i, loss, train_acc))

print('\n---Completed Training---')