In [None]:
from __future__ import print_function
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import sys
import os

import cntk as C
import cntk.tests.test_utils
cntk.tests.test_utils.set_device_from_pytest_env() # (only needed for our build system)
C.cntk_py.set_fixed_random_seed(1) # fix the random seed so that LR examples are repeatable

%matplotlib inline

In [2]:
input_dim = 784
num_output_classes = 10

In [3]:
def create_reader(path, is_training, input_dim, num_label_classes):

    labelStream = C.io.StreamDef(field='labels', shape=num_label_classes, is_sparse=False)
    featureStream = C.io.StreamDef(field='features', shape=input_dim, is_sparse=False)

    deserailizer = C.io.CTFDeserializer(path, C.io.StreamDefs(labels = labelStream, features = featureStream))

    return C.io.MinibatchSource(deserailizer,
       randomize = is_training, max_sweeps = C.io.INFINITELY_REPEAT if is_training else 1)

In [4]:
data_dir = os.path.join("DataSets", "MNIST")

train_file = os.path.join(data_dir, "Train-28x28_cntk_text.txt")
test_file = os.path.join(data_dir, "Test-28x28_cntk_text.txt")

print("Data directory is {0}".format(data_dir))

Data directory is DataSets\MNIST


In [5]:
input = C.input_variable(input_dim)
label = C.input_variable(num_output_classes)

In [6]:
def create_model(features, no_of_hidden_neurons = 1024):
    with C.layers.default_options(init = C.glorot_uniform()):
        r = C.layers.Dense(no_of_hidden_neurons, activation = None)(features)
        r = C.layers.Dense(num_output_classes, activation = None)(r)
        return r

In [7]:
reader_train = create_reader(train_file, True, input_dim, num_output_classes)

# Map the data streams to the input and labels.
input_map = {
    label  : reader_train.streams.labels,
    input  : reader_train.streams.features
}

In [8]:
def train_network(model, learning_rate = 0.1, momentum = 0.9, max_epochs = 50, dataset_multiplier = 1):

    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size

    loss = C.cross_entropy_with_softmax(model, label)
    label_error = C.classification_error(model, label)

    # Run the trainer on and perform model training
    training_progress_output_freq = 500

    # Training Parameters
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)
    lr_schedule = C.learning_parameter_schedule(learning_rate)
    momentums = C.momentum_schedule(momentum, minibatch_size = minibatch_size)

    learner = C.momentum_sgd(model.parameters, lr_schedule, momentum = momentums)
    trainer = C.Trainer(model, (loss, label_error), [learner], [progress_printer])

    
    plotdata = {"epoch":[], "loss":[], "error":[]}
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)

    batch_index = 0
    for epoch in range(max_epochs):       # loop over epochs
        avg_loss = 0
        avg_error = 0
        for i in range(0, int(num_minibatches_to_train * dataset_multiplier)):

            # Read a mini batch from the training data file
            data = reader_train.next_minibatch(minibatch_size, input_map = input_map)
            trainer.train_minibatch(data)
            
            avg_loss += trainer.previous_minibatch_loss_average
            avg_error += trainer.previous_minibatch_evaluation_average

        plotdata["epoch"].append(epoch)
        plotdata["loss"].append(avg_loss / int(num_minibatches_to_train))
        plotdata["error"].append(avg_error / int(num_minibatches_to_train))
        trainer.summarize_training_progress()
    return plotdata

In [9]:
def plot_data(plotdata = {}):
    plt.figure()
    plt.subplot(211)
    plt.plot(plotdata["epoch"], plotdata["loss"], 'b--')
    plt.xlabel('Epoch number')
    plt.ylabel('Loss')
    plt.title('Epoch run vs. Training loss')

    plt.show()

    plt.subplot(212)
    plt.plot(plotdata["epoch"], plotdata["error"], 'r--')
    plt.xlabel('Epoch number')
    plt.ylabel('Label Prediction Error')
    plt.title('Epoch run vs. Label Prediction Error')
    plt.show()

# Expreiment 1

## Vary number of hidden units

### n = 20

In [None]:
z1_1 = create_model(input/255.0, no_of_hidden_neurons=20)

data1_1 = train_network(z1_1)

plot_data(data1_1)

### n = 50

In [None]:
z1_2 = create_model(input/255.0, no_of_hidden_neurons=50)

data1_2 = train_network(z1_2)

plot_data(data1_2)

### n = 100

In [None]:
z1_3 = create_model(input/255.0, no_of_hidden_neurons=100)

data1_3 = train_network(z1_3)

plot_data(data1_3)

# Expreiment 2

## Vary Momentums

### momentum = 0

In [None]:
z2 = create_model(input/255.0, no_of_hidden_neurons=100)

data2_1 = train_network(z2, momentum = 0, max_epochs = 20)

plot_data(data2_1)

### momentum = 0.25

In [None]:
z2_2 = create_model(input/255.0, no_of_hidden_neurons=100)

data2_2 = train_network(z2_2, momentum = 0.25, max_epochs = 20)

plot_data(data2_2)

### momentum = 0.5

In [None]:
z2_3 = create_model(input/255.0, no_of_hidden_neurons=100)

data2_3 = train_network(z2_3, momentum = 0.5, max_epochs = 20)

plot_data(data2_3)

# Expreiment 3

## Vary Dataset Size

### dataset_size = 1/4