In [1]:
import numpy as np
from load_mnist import load_mnist
from init_convnet import init_convnet
from conv_net import conv_net
from utils import sgd_momentum, get_lr, get_lenet
import copy
from scipy.io import savemat

# Set random seeds
np.random.seed(100000)

# Network definition
layers = get_lenet()

# Loading data
fullset = False
xtrain, ytrain, xvalidate, yvalidate, xtest, ytest = load_mnist(fullset)
xtrain = np.hstack((xtrain, xvalidate))
ytrain = np.hstack((ytrain, yvalidate))
m_train = xtrain.shape[1]
batch_size = 100

# Parameters initialization
mu = 0.9
epsilon = 0.01
gamma = 0.0001
power = 0.75
weight_decay = 0.0005
w_lr = 1
b_lr = 2

test_interval = 500
display_interval = 50
snapshot = 500
max_iter = 2000

# Use the following to train from scratch
params = init_convnet(layers)

params_winc = copy.deepcopy(params)

# Training the network
new_order = np.random.permutation(m_train)
xtrain = xtrain[:, new_order]
ytrain = ytrain[:, new_order]
curr_batch = 0  

In [2]:
if curr_batch >= m_train:
    new_order = np.random.permutation(m_train)
    xtrain = xtrain[:, new_order]
    ytrain = ytrain[:, new_order]
    curr_batch = 0

In [3]:
x_batch = xtrain[:, curr_batch:curr_batch+batch_size]
y_batch = ytrain[:, curr_batch:curr_batch+batch_size]
curr_batch += batch_size

In [34]:
import numpy as np
from conv_layer import conv_layer_forward, conv_layer_backward
from pooling_layer import pooling_layer_forward, pooling_layer_backward
from inner_product import inner_product_forward, inner_product_backward
from relu import relu_forward, relu_backward
from mlrloss import mlrloss
from conv_net import convnet_forward

In [33]:
#cp, param_grad = conv_net(params, layers, x_batch, y_batch)
data = x_batch
labels = y_batch

In [35]:
# Forward pass
output = convnet_forward(params, layers, data)

In [8]:
layers = []

# Layer 1: DATA
layers.append({
    'type': 'DATA',
    'height': 28,
    'width': 28,
    'channel': 1,
    'batch_size': batch_size
})

# Layer 2: CONV
layers.append({
    'type': 'CONV',
    'num': 20,
    'k': 5,
    'stride': 1,
    'pad': 0,
    'group': 1
})

# Layer 3: RELU
layers.append({
    'type': 'RELU'
})

# Layer 4: POOLING
layers.append({
    'type': 'POOLING',
    'k': 2,
    'stride': 2,
    'pad': 0
})

# Layer 5: CONV
layers.append({
    'type': 'CONV',
    'k': 5,
    'stride': 1,
    'pad': 0,
    'group': 1,
    'num': 50
})

# Layer 6: RELU
layers.append({
    'type': 'RELU'
})

# Layer 7: POOLING
layers.append({
    'type': 'POOLING',
    'k': 2,
    'stride': 2,
    'pad': 0
})

# Layer 8: IP
layers.append({
    'type': 'IP',
    'num': 500,
    'init_type': 'uniform'
})

# Layer 9: RELU
layers.append({
    'type': 'RELU'
})

# Layer 10: LOSS
layers.append({
    'type': 'LOSS',
    'num': 10
})

In [9]:
l = len(layers)
batch_size = layers[0]['batch_size']

In [12]:
l = len(layers)
assert layers[0]['type'] == 'DATA', 'first layer must be data layer'

output = [{}]
output[0]['data'] = data
output[0]['height'] = layers[0]['height']
output[0]['width'] = layers[0]['width']
output[0]['channel'] = layers[0]['channel']
output[0]['batch_size'] = layers[0]['batch_size']
output[0]['diff'] = 0

In [13]:
for i in range(1, l-1):
        layer_type = layers[i]['type']
        if layer_type == 'CONV':
            output.append(conv_layer_forward(output[i-1], layers[i], params[i-1]))
        elif layer_type == 'POOLING':
            output.append(pooling_layer_forward(output[i-1], layers[i]))
        elif layer_type == 'IP':
            output.append(inner_product_forward(output[i-1], layers[i], params[i-1]))
        elif layer_type == 'RELU':
            output.append(relu_forward(output[i-1]))
        else:
            raise Exception('Invalid layer type: %s' % layer_type)

In [57]:
# Loss layer
i = l - 1
assert layers[i]['type'] == 'LOSS', 'last layer must be loss layer'
wb = np.concatenate([params[i-1]['w'].ravel(), params[i-1]['b'].ravel()])
cost, grad, input_od, percent = mlrloss(wb, output[i-1]['data'], labels, layers[i]['num'], 0, 1)

In [58]:
param_grad = []
if test is False:
    pg = {}
    pg['w'] = np.reshape(grad[:params[i-1]['w'].size], params[i-1]['w'].shape) / batch_size
    pg['b'] = np.reshape(grad[-params[i-1]['b'].size:], params[i-1]['b'].shape) / batch_size
    param_grad.append(pg)

    for i in range(l-2, 0, -1):
        layer_type = layers[i]['type']
        output[i]['diff'] = input_od

        pg = {}

        if layer_type == 'CONV':
            pg, input_od = conv_layer_backward(output[i], output[i-1], layers[i], params[i-1])
        elif layer_type == 'POOLING':
            input_od = pooling_layer_backward(output[i], output[i-1], layers[i])
            pg['w'] = []
            pg['b'] = []
        elif layer_type == 'IP':
            pg, input_od = inner_product_backward(output[i], output[i-1], layers[i], params[i-1])
        elif layer_type in 'RELU':
            input_od = relu_backward(output[i], output[i-1], layers[i])
            pg['w'] = []
            pg['b'] = []

        pg['w'] = np.array(pg['w']) / batch_size
        pg['b'] = np.array(pg['b']) / batch_size
        param_grad.append(pg)
        print(pg)

# reverse the param_grad list
param_grad = param_grad[::-1]

In [59]:
cp = {'cost': cost / batch_size, 'percent': percent}

In [60]:
len(param_grad)

0