# High-level CNTK Example

In [1]:
# Parameters
EPOCHS = 10
N_CLASSES=10
BATCHSIZE = 64
LR = 0.01
MOMENTUM = 0.9
GPU = True

LOGGER_URL='msdlvm.southcentralus.cloudapp.azure.com'
LOGGER_USRENAME='admin'
LOGGER_PASSWORD='password'
LOGGER_DB='gpudata'
LOGGER_SERIES='gpu'

In [2]:
import numpy as np
import os
import sys
import cntk
from cntk.layers import Convolution2D, MaxPooling, Dense, Dropout
from os import path
from utils import cifar_for_library, yield_mb, create_logger, Timer
from nb_logging import NotebookLogger, output_to, error_to
from gpumon.influxdb import log_context
import codecs

from influxdb import InfluxDBClient

In [None]:
client = InfluxDBClient(LOGGER_URL, 8086, LOGGER_USRENAME, LOGGER_PASSWORD, LOGGER_DB)

In [None]:
node_id = os.getenv('AZ_BATCH_NODE_ID', default='node')
task_id = os.getenv('AZ_BATCH_TASK_ID', default='cntk')
job_id = os.getenv('AZ_BATCH_JOB_ID', default='cntk')

In [None]:
logger = create_logger(client, node_id=node_id, task_id=task_id, job_id=job_id)

In [3]:
nb_teminal_logger = NotebookLogger(sys.stdout.session, sys.stdout.pub_thread, sys.stdout.name, sys.__stdout__)

In [4]:
rst_out = output_to(nb_teminal_logger)
rst_err = error_to(nb_teminal_logger)

In [5]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("CNTK: ", cntk.__version__)

OS:  linux
Python:  3.5.2 |Continuum Analytics, Inc.| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.11.2
CNTK:  2.1


In [6]:
data_path = path.join(os.getenv('AZ_BATCHAI_INPUT_DATASET'), 'cifar-10-batches-py')

In [7]:
def create_network():
    # Weight initialiser from uniform distribution
    # Activation (unless states) is None
    with cntk.layers.default_options(init = cntk.glorot_uniform(), activation = cntk.relu):
        x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(features)
        x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(x)
        x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)
        x = Dropout(0.25)(x)

        x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)
        x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)
        x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)
        x = Dropout(0.25)(x)    
        
        x = Dense(512)(x)
        x = Dropout(0.5)(x)
        x = Dense(N_CLASSES, activation=None)(x)
        return x

In [8]:
def init_model(m):
    # Loss (dense labels); check if support for sparse labels
    loss = cntk.cross_entropy_with_softmax(m, labels)  
    # Momentum SGD
    # https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb
    # unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient
    # if unit_gain=True then ...(1-momentum)*gradient
    learner = cntk.momentum_sgd(m.parameters,
                                lr=cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch) ,
                                momentum=cntk.momentum_schedule(MOMENTUM), 
                                unit_gain=False)
    trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)), [learner])
    return trainer

In [9]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(data_path, channel_first=True, one_hot=True)
# CNTK format
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
Done.
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000, 10) (10000, 10)
float32 float32 float32 float32
CPU times: user 1.09 s, sys: 696 ms, total: 1.79 s
Wall time: 1.87 s


In [10]:
%%time
# Placeholders
features = cntk.input_variable((3, 32, 32), np.float32)
labels = cntk.input_variable(N_CLASSES, np.float32)
# Load symbol
sym = create_network()

CPU times: user 52 ms, sys: 1.66 s, total: 1.71 s
Wall time: 3.18 s


In [11]:
%%time
trainer = init_model(sym)

CPU times: user 68 ms, sys: 660 ms, total: 728 ms
Wall time: 1.78 s


In [12]:
with Timer() as t:
    with log_context(LOGGER_URL, LOGGER_USRENAME, LOGGER_PASSWORD, LOGGER_DB, LOGGER_SERIES, 
                     node_id=node_id, task_id=task_id, job_id=job_id):
        # Train model
        for j in range(EPOCHS):
            for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
                trainer.train_minibatch({features: data, labels: label})
            # Log (this is just last batch in epoch, not average of batches)
            eval_error = trainer.previous_minibatch_evaluation_average
            print("Epoch %d  |  Accuracy: %.6f" % (j+1, (1-eval_error)))
print('Training took %.03f sec.' % t.interval)
logger('training duration', value=t.interval)

Epoch 1  |  Accuracy: 0.390625
Epoch 2  |  Accuracy: 0.562500
Epoch 3  |  Accuracy: 0.609375
Epoch 4  |  Accuracy: 0.781250
Epoch 5  |  Accuracy: 0.578125
Epoch 6  |  Accuracy: 0.812500
Epoch 7  |  Accuracy: 0.828125
Epoch 8  |  Accuracy: 0.765625
Epoch 9  |  Accuracy: 0.750000
Epoch 10  |  Accuracy: 0.640625
CPU times: user 1min 2s, sys: 50.5 s, total: 1min 52s
Wall time: 4min 33s


In [13]:
%%time
# Predict and then score accuracy
# Apply softmax since that is only applied at training
# with cross-entropy loss
z = cntk.softmax(sym)
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = np.argmax(y_test[:n_samples], axis=-1)
c = 0
for data, label in yield_mb(x_test, y_test, BATCHSIZE):
    predicted_label_probs = z.eval({features : data})
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)
    c += 1

CPU times: user 372 ms, sys: 416 ms, total: 788 ms
Wall time: 3 s


In [14]:
acc=sum(y_guess == y_truth)/len(y_guess)
print("Accuracy: ", acc)
logger('accuracy', value=acc)

Accuracy:  0.776241987179
