In [None]:
from __future__ import print_function

import logging
import random

import mxnet as mx
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.decomposition import PCA
import pickle
import numpy as np

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

np.random.seed(1234) # set seed for deterministic ordering
mx.random.seed(1234)
random.seed(1234)

In [None]:
with open("data/train_x", "rb") as fp:
    train_x = pickle.load(fp)
with open("data/train_ys", "rb") as fp:
    train_ys = pickle.load(fp)
with open("data/train_yp", "rb") as fp:
    train_yp = pickle.load(fp)
with open("data/train_yn", "rb") as fp:
    train_yn = pickle.load(fp)

In [None]:
with open("data/test_x", "rb") as fp:
    test_x = pickle.load(fp)
with open("data/test_ys", "rb") as fp:
    test_ys = pickle.load(fp)
with open("data/test_yp", "rb") as fp:
    test_yp = pickle.load(fp)
with open("data/test_yn", "rb") as fp:
    test_yn = pickle.load(fp)

In [None]:
X_train = train_x[0][0].asnumpy()
for l in range(1,len(train_x)):
    X_train = np.concatenate((X_train,train_x[l][0].asnumpy()),axis=0)

In [None]:
X_test = test_x[0][0].asnumpy()
for l in range(1,len(test_x)):
    X_test = np.concatenate((X_test,test_x[l][0].asnumpy()),axis=0)

In [None]:
Y_train = np.stack(train_ys)
train_yp = np.stack(train_yp)
train_yn = np.stack(train_yn)
Y_test = np.stack(test_ys)
test_yp = np.stack(test_yp)
test_yn = np.stack(test_yn)

In [None]:
# Network declaration as symbols. The following pattern was based
# on the article, but feel free to play with the number of nodes
# and with the activation function
data = mx.symbol.Variable('data')
fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=512)
act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 512)
act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=2)

# Here we add the ultimate layer based on L2-SVM objective
mlp_svm_l2 = mx.symbol.SVMOutput(data=fc3, name='svm_l2')

# With L1-SVM objective
mlp_svm_l1 = mx.symbol.SVMOutput(data=fc3, name='svm_l1', use_linear=True)

# Compare with softmax cross entropy loss
mlp_softmax = mx.symbol.SoftmaxOutput(data=fc3, name='softmax')

In [None]:
batch_size = 200
ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu()

In [None]:
results = {}
for output in [mlp_svm_l2, mlp_svm_l1, mlp_softmax]:
    
    print("\nTesting with %s \n" % output.name)
    
    label = output.name + "_label"
    
    train_iter = mx.io.NDArrayIter(X_train, Y_train, batch_size=batch_size, label_name=label)
    test_iter = mx.io.NDArrayIter(X_test, Y_test, batch_size=batch_size, label_name=label)

    # Here we instatiate and fit the model for our data
    # The article actually suggests using 400 epochs,
    # But I reduced to 10, for convenience

    mod = mx.mod.Module(
        context = ctx, 
        symbol = output,         # Use the network we just defined
        label_names = [label],
    )
    mod.fit(
        train_data=train_iter,
        eval_data=test_iter,  # Testing data set. MXNet computes scores on test set every epoch
        batch_end_callback = mx.callback.Speedometer(batch_size, 200),  # Logging module to print out progress
        num_epoch = 10,       # Train for 10 epochs
        optimizer_params = {
            'learning_rate': 0.1,  # Learning rate
            'momentum': 0.9,       # Momentum for SGD with momentum
            'wd': 0.00001,         # Weight decay for regularization
        })
    results[output.name] = mod.score(test_iter, mx.metric.Accuracy())[0][1]*100
    print('Accuracy for %s:'%output.name, mod.score(test_iter, mx.metric.Accuracy())[0][1]*100, '%\n')
    
for key, value in results.items():
    print(key, value, "%s")