In [75]:
from mxnet import gluon, nd, autograd
import mxnet as mx
import numpy as np
import csv
import pickle
import random
import sys

In [2]:
context = mx.cpu()

In [66]:
X_train = list()
X_test = list()
X_validate = list()

csvtrain = csv.reader(open("/Users/root02/Downloads/emnist-balanced-train.csv"))
csvtest = csv.reader(open("/Users/root02/Downloads/emnist-balanced-test.csv"))

for row_ in csvtrain:
    X_train.append (((np.array(row_[1:]).reshape((784, 1)).reshape((28, 28))), row_[0]))

for row_1 in csvtest:
    X_test.append ((np.array(row_1[1:]).reshape((784, 1)).reshape((28, 28)), row_1[0]))
    
random.shuffle(X_train)
X_validate = X_train[:10000]
X_train = X_train[10000:]

### Define EMNIST Dataset Class

In [57]:
class EMNIST(gluon.data.Dataset):
    
    def __init__ (self, list_data, transform=None):
        self._data = list_data
        self._transform = transform
        
    def __len__(self):
        return len(self._data)
    
    def __getitem__(self, idx):
        return self._transform(self._data[idx])

### Create transform function

In [95]:
def transform_tuple(tuple_data):
    return nd.array(tuple_data[0].reshape(-1, 28, 28).astype(np.float32)/255), int(tuple_data[1])

### Define data loaders

In [96]:
training_data = gluon.data.DataLoader(EMNIST(X_train, transform=transform_tuple), batch_size=64, shuffle=True)
validation_data = gluon.data.DataLoader(EMNIST(X_validate, transform=transform_tuple), batch_size=64, shuffle=False)
test_data = gluon.data.DataLoader(EMNIST(X_test, transform=transform_tuple), batch_size=64, shuffle=False)

### Define the neural network

In [68]:
net = gluon.nn.Sequential()

with net.name_scope():
    net.add(gluon.nn.Conv2D(channels=64, kernel_size=(5, 5), activation="relu"))
    net.add(gluon.nn.Conv2D(channels=64, kernel_size=(3, 3), activation="relu"))
    net.add(gluon.nn.MaxPool2D())
    net.add(gluon.nn.Dense(64, activation="relu"))
    net.add(gluon.nn.Dense(64, activation="relu"))
    net.add(gluon.nn.Dense(47))

In [70]:
cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
net.collect_params().initialize(mx.init.Xavier(), ctx = context)
trainer = gluon.Trainer(net.collect_params(), "adam")

In [118]:
def eval_acc(net, dataset):
    acc = mx.metric.Accuracy()
    for idx, (data, label) in enumerate(dataset):
        output = net(data)
        preds = nd.argmax(output, axis = 1)
        acc.update(preds=preds, labels=label)
    return acc.get()[-1]

In [116]:
def training_loop(net, dataset, num_epochs = 1):
    for i in range(num_epochs):
        for idx, (data, label) in enumerate(dataset):
            with autograd.record():
                output = net(data)
                loss = cross_entropy(output, label)
            loss.backward()
            trainer.step(data.shape[0])
            sys.stdout.write("\rPercentage completed: "+str((idx+1)/len(dataset)*100)[:10])
            sys.stdout.flush()
        print ("\nEpoch %s / Training Accuracy: %s, Test Accuracy: %s" % (str(i+1), str(eval_acc(net, training_data)), str(eval_acc(net, validation_data))))
    print ("Training completed! / Training Accuracy: %s, Test Accuracy: %s" % (str(eval_acc(net, training_data)), str(eval_acc(net, validation_data))))

In [119]:
training_loop(net, training_data, 0)

Training completed! / Training Accuracy: 0.849873540856, Test Accuracy: 0.8396


There are 2 measures: avoidable bias and variance.

1) Bayes Error: Bayes Error is the minimum possible error that is achievable. In this use-case, we can assume ~0% error.

2) Avoidable Bias: Bayes Error - Training Error

3) Variance: Training Error - Test Error

Bayes Error = 0

Training Error = 15%

Test Error = 16%

Avoidable Bias = 15%
Variance = 1%

2-3 techniques:

1) Train for more epochs

2) Train a more complex network

3) Use batch normalization to speed up convergence (sometimes this helps accuracy)