In [9]:
"""In this assignment, you should train your own net on cifar10 classification with deep learning framework MXNet.
   With MXNet, you only need to define the nets with symbol connection, then set hyperparameters to train the 
   network. You can also save your model and load pretrained model to finetune the network. Make sure using GPU 
   mode. You should achieve at least 80% on the validation set."""

"""vist http://mxnet.io/get_started/index.html to get familar with mxnet!"""
   
import sys
import os
import numpy as np
import mxnet as mx
import logging

# download data if necessary
def _download(data_dir):
    if not os.path.isdir(data_dir):
        os.system("mkdir " + data_dir)
    os.chdir(data_dir)
    if (not os.path.exists('train.rec')) or \
       (not os.path.exists('test.rec')) :
        os.system("wget http://data.dmlc.ml/mxnet/data/cifar10.zip")
        os.system("unzip -u cifar10.zip")
        os.system("mv cifar/* .; rm -rf cifar; rm cifar10.zip")
    os.chdir("..")


# data
def get_iterator(data_shape=(3, 28, 28)):
    if '://' not in data_dir:
        _download(data_dir)

    train = mx.io.ImageRecordIter(
        path_imgrec = os.path.join(data_dir, "train.rec"),
        mean_img    = os.path.join(data_dir, "mean.bin"),
        data_shape  = data_shape,
        batch_size  = batch_size,
        rand_crop   = True,
        rand_mirror = True)

    val = mx.io.ImageRecordIter(
        path_imgrec = os.path.join(data_dir, "test.rec"),
        mean_img    = os.path.join(data_dir, "mean.bin"),
        rand_crop   = False,
        rand_mirror = False,
        data_shape  = data_shape,
        batch_size  = batch_size)

    return (train, val)


def get_net(num_classes=10):
    #####################################################################################
    # TODO: define your net                                                             #
    # Define symbols that using convolution and max pooling to extract better features  #
    # from input image.                                                                 #
    #####################################################################################
    pass
    data = mx.symbol.Variable(name="data")
    
    # group 1
    conv1_1 = mx.symbol.Convolution(data=data, kernel=(3,3), pad=(1,1), num_filter=128, name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
    pool1 = mx.symbol.Pooling(data=relu1_1, pool_type="max", kernel=(2,2), stride=(2,2), name="pool1")
    # group 2
    conv2_1 = mx.symbol.Convolution(data=pool1, kernel=(3,3), pad=(1,1), num_filter=128, name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
    pool2 = mx.symbol.Pooling(data=relu2_1, pool_type="max", kernel=(2,2), stride=(2,2), name="pool2")
    # group 3
    
    conv3_1 = mx.symbol.Convolution(data=pool2, kernel=(3,3), pad=(1,1), num_filter=256, name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
    conv3_2 = mx.symbol.Convolution(data=relu3_1, kernel=(3,3), pad=(1,1), num_filter=256, name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
    pool_3 = mx.symbol.Pooling(data=relu3_2, pool_type="max", kernel=(2,2), stride=(2,2), name="pool_3")
    # group 4
    flatten = mx.symbol.Flatten(data=pool_3, name="flatten")
    fc4 = mx.symbol.FullyConnected(data=flatten, num_hidden=1024, name="fc4")
    relu4 = mx.symbol.Activation(data=fc4, act_type="relu", name="relu4")
    drop4 = mx.symbol.Dropout(data=relu4, p=0.5, name="drop4")
    # group 5
    fc5 = mx.symbol.FullyConnected(data=flatten, num_hidden=1024, name="fc5")
    relu5 = mx.symbol.Activation(data=fc5, act_type="relu", name="relu5")
    drop5 = mx.symbol.Dropout(data=relu5, p=0.5, name="drop5")
    # output
    fc6 = mx.symbol.FullyConnected(data=drop5, num_hidden=1024, name="fc6")
    softmax = mx.symbol.SoftmaxOutput(data=fc6, name="softmax")

    #####################################################################################
    #                              END OF YOUR CODE                                     #
    #####################################################################################
    return softmax

In [10]:
network = get_net()

################################################################################
# TODO: this is similar as solver                                              #
################################################################################

############################ set hyperparameters ###############################
batch_size = 100
weight_decay = 1e-4  # same as weight reg
num_epoch = 10
learning_rate = 1e-2
devs=mx.cpu(1)     # set device id

################################  path #########################################
data_dir = 'cifar10/'
chk_dir = 'model/'
chk_prefix = chk_dir +'net1'
load_model = False   ## set true if you want to load a pretrained model and finetune with lower learning rate

if not os.path.isdir(chk_dir):
     os.system("mkdir " + chk_dir)

reload(logging)
head = '%(asctime)-15s %(message)s'
logging.basicConfig(level=logging.DEBUG, format=head)

eval_metrics = ['accuracy']

## TopKAccuracy only allows top_k > 1
#eval_metrics.append(mx.metric.create('top_k_accuracy', top_k = 5))

if load_model:
    model_prefix = 'model/net1'
    model_iter = 30  # which model to load

    _, arg_params,__ = mx.model.load_checkpoint(model_prefix, model_iter)
else:
    arg_params = None
    model_iter = 0

model=mx.model.FeedForward(
       ctx      = devs,
       symbol   = network,
       arg_params = arg_params,
       begin_epoch = model_iter,
       num_epoch  = num_epoch,
       learning_rate = learning_rate,
       momentum      = 0.9,
       wd            = weight_decay,
      initializer   = mx.init.Xavier(factor_type='in', magnitude=2.34)    ## weight initialization
       )

train_ite, val_ite = get_iterator()
model.fit(
        X          = train_ite,
        eval_data  = val_ite,
        eval_metric = eval_metrics,
        batch_end_callback = mx.callback.Speedometer(batch_size, 50), 
        epoch_end_callback=mx.callback.do_checkpoint(chk_prefix, 10)   ## save your model after each 10 epochs
        )

################################################################################
#                              END OF YOUR CODE                                #
################################################################################

2016-10-30 00:17:54,835 Start training with [cpu(1)]
2016-10-30 00:30:55,465 Epoch[0] Batch [50]	Speed: 6.93 samples/sec	Train-accuracy=0.095000
2016-10-30 00:43:03,891 Epoch[0] Batch [100]	Speed: 6.86 samples/sec	Train-accuracy=0.098400


KeyboardInterrupt: 