In [45]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import gzip
import os
import pickle
import json as json

import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)

In [46]:
class JSONEnc(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(JSONEnc, self).default(obj)

def one_hot(labels):
    """this creates a one hot encoding from a flat vector:
    i.e. given y = [0,2,1]
     it creates y_one_hot = [[1,0,0], [0,0,1], [0,1,0]]
    """
    classes = np.unique(labels)
    n_classes = classes.size
    one_hot_labels = np.zeros(labels.shape + (n_classes,))
    for c in classes:
        one_hot_labels[labels == c, c] = 1
    return one_hot_labels

def unhot(one_hot_labels):
    """ Invert a one hot encoding, creating a flat vector """
    return np.argmax(one_hot_labels, axis=-1)

def mnist(datasets_dir='./data'):
    if not os.path.exists(datasets_dir):
        os.mkdir(datasets_dir)
    data_file = os.path.join(datasets_dir, 'mnist.pkl.gz')
    if not os.path.exists(data_file):
        print('... downloading MNIST from the web')
        try:
            import urllib
            urllib.urlretrieve('http://google.com')
        except AttributeError:
            import urllib.request as urllib
        url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        urllib.urlretrieve(url, data_file)

    print('... loading data')
    # Load the dataset
    f = gzip.open(data_file, 'rb')
    try:
        train_set, valid_set, test_set = pickle.load(f, encoding="latin1")
    except TypeError:
        train_set, valid_set, test_set = pickle.load(f)
    f.close()

    test_x, test_y = test_set
    test_x = test_x.astype('float32')
    test_x = test_x.astype('float32').reshape(test_x.shape[0], 28, 28, 1)
    test_y = test_y.astype('int32')
    valid_x, valid_y = valid_set
    valid_x = valid_x.astype('float32')
    valid_x = valid_x.astype('float32').reshape(valid_x.shape[0], 28, 28, 1)
    valid_y = valid_y.astype('int32')
    train_x, train_y = train_set
    train_x = train_x.astype('float32').reshape(train_x.shape[0], 28, 28, 1)
    train_y = train_y.astype('int32')
    print('... done loading data')
#     return train_x, one_hot(train_y), valid_x, one_hot(valid_y), test_x, one_hot(test_y)
    return train_x, train_y, valid_x, valid_y, test_x, test_y

# Using tf Estimators
# Conv Net Function
def build_cnn_model(features, labels, mode, params):
    # STEP 1 : BUILD CONVNET
    # GRAPH : I/P --> CL1 --> RELU --> POOL1 --> CL2 --> RELU --> POOL2 --> FC --> SOFTMAX
    
    # Hyperparameters
    lr = params['lr']
    num_filters = params['num_filters']
    filter_size = params['filter_size']
    
    # Convolutional Layer #1
    # Computes num_filters features using a filter_size*filter_size filter with ReLU activation.
    # Padding is SAME
    # Input Tensor Shape: [batch_size, 28, 28, 1]
    # Output Tensor Shape: [batch_size, 28, 28, num_filters]
    x_input = tf.reshape(features["x_ip"], [-1, 28, 28, 1])
    convlayer1 = tf.layers.conv2d(inputs=x_input, filters=num_filters, kernel_size=filter_size,\
                                  padding="same", activation=tf.nn.relu)
    # Pooling Layer #1
    # First max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 28, 28, num_filters]
    # Output Tensor Shape: [batch_size, 14, 14, num_filters]
    maxpool1 = tf.layers.max_pooling2d(inputs=convlayer1, pool_size=2, strides=2)
    
    # Convolutional Layer #2
    # Computes num_filters features using a filter_size*filter_size filter with ReLU activation.
    # Padding is SAME
    # Input Tensor Shape: [batch_size, 14, 14, num_filters]
    # Output Tensor Shape: [batch_size, 14, 14, num_filters]
    convlayer2 = tf.layers.conv2d(inputs=maxpool1, filters=num_filters, kernel_size=filter_size,\
                                  padding="same", activation=tf.nn.relu)
    # Pooling Layer #2
    # Second max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 14, 14, num_filters]
    # Output Tensor Shape: [batch_size, 7, 7, num_filters]
    maxpool2 = tf.layers.max_pooling2d(inputs=convlayer2, pool_size=2, strides=2)
    
    # Flatten tensor into a batch of vectors
    # Input Tensor Shape: [batch_size, 7, 7, num_filters]
    # Output Tensor Shape: [batch_size, 7 * 7 * num_filters]
#     maxpool2_vec = tf.reshape(maxpool2, [-1, 7 * 7 * num_filters])
    maxpool2_vec = tf.reshape(maxpool2, [-1, maxpool2.shape[1] * maxpool2.shape[2] * num_filters])
    
    # Dense Layer
    # Densely connected layer with 128 neurons
    # Input Tensor Shape: [batch_size, 7 * 7 * num_filters]
    # Output Tensor Shape: [batch_size, 128]
    denselayer = tf.layers.dense(inputs=maxpool2_vec, units=128)
    
    # Logits Layer
    logitslayer = tf.layers.dense(inputs=denselayer, units=10, activation=None)
#     logits = tf.layers.dense(inputs=dropout, units=10)#size: [batch_size,10]

    # Generate predictions (for PREDICT and EVAL mode)
    predictions = { "classes": tf.argmax(input=logitslayer, axis=1),\
                   "probabilities": tf.nn.softmax(logitslayer, name="softmax_tensor") }
    
    # Calculate Loss (for both TRAIN and EVAL modes)
    sceloss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logitslayer)

    ## SHAPE CHECK
    print("x_input.shape", x_input.shape)
    print("convlayer1.shape", convlayer1.shape)
    print("maxpool1.shape", maxpool1.shape)
    print("convlayer2.shape", convlayer2.shape)
    print("maxpool2.shape", maxpool2.shape)
    print("maxpool2_vec.shape", maxpool2_vec.shape)
    print("denselayer.shape", denselayer.shape)
    print("logitslayer.shape", logitslayer.shape)
    print("labels.shape", labels.shape)
    print("sceloss.shape", sceloss.shape)
    
    # Generate estimator object for TRAIN
    if mode == tf.estimator.ModeKeys.TRAIN:
        print("tf.estimator.ModeKeys.TRAIN...")
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
        train_op = optimizer.minimize(loss=sceloss,global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(tf.estimator.ModeKeys.TRAIN,loss=sceloss, train_op=train_op)
    
    # Add evaluation metrics (for EVAL mode)
    if mode == tf.estimator.ModeKeys.EVAL: 
        print("tf.estimator.ModeKeys.EVAL...")
        eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}
        return tf.estimator.EstimatorSpec(mode=mode, loss=sceloss, eval_metric_ops=eval_metric_ops)

def train_and_validate(x_train, y_train, x_valid, y_valid, num_epochs, lr, num_filters, batch_size, filter_size):
    # TODO: train and validate your convolutional neural networks with the provided data and hyperparameters
    learning_curve = []
    param_dict = {'lr': lr, 'num_filters': num_filters, 'filter_size': filter_size,}
    dir_name = "./mnist_convnet_modellearningrate_" + str(lr)
    print(dir_name)
    
    # Logging Hook
    tensors_to_log = {"probabilities": "softmax_tensor"}
    log_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=10)
    
    # Create the Estimator
    MNIST_estimator = tf.estimator.Estimator(model_fn=build_cnn_model, params=param_dict, model_dir=dir_name)
    
    # Training Param Func
    train_func = tf.estimator.inputs.numpy_input_fn(x={"x_ip": x_train}, y=y_train, batch_size=batch_size,\
                                                    num_epochs=None, shuffle=True)
    # Val Param Func
    val_func = tf.estimator.inputs.numpy_input_fn(x={"x_ip": x_valid}, y=y_valid, num_epochs=1, shuffle=False)
    
    # Train & Validate the model/estimator
    for epoch in range(1, num_epochs + 1):
        te = MNIST_estimator.train(input_fn=train_func, hooks=[log_hook], steps=1)
        print("TRAINING EPOCH...", epoch)
        print(te)

        ve = MNIST_estimator.evaluate(input_fn=val_func)
        print("EVAL EPOCH...", epoch)
        print(ve)
        learning_curve.append(ve)
    
    return learning_curve, MNIST_estimator  # TODO: Return the validation error after each epoch (i.e learning curve) and your model


def test(x_test, y_test, model):
    # TODO: test your network here by evaluating it on the test data
    test_func = tf.estimator.inputs.numpy_input_fn(x={"x_ip": x_test}, y=y_test, num_epochs=1, shuffle=False)
    test_err = model.evaluate(input_fn=test_func)
    return test_err


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--output_path", default="./", type=str, nargs="?",
                        help="Path where the results will be stored")
    parser.add_argument("--input_path", default="./", type=str, nargs="?",
                        help="Path where the data is located. If the data is not available it will be downloaded first")
    parser.add_argument("--learning_rate", default=1e-3, type=float, nargs="?", help="Learning rate for SGD")
    parser.add_argument("--num_filters", default=32, type=int, nargs="?",
                        help="The number of filters for each convolution layer")
    parser.add_argument("--batch_size", default=128, type=int, nargs="?", help="Batch size for SGD")
    parser.add_argument("--epochs", default=12, type=int, nargs="?",
                        help="Determines how many epochs the network will be trained")
    parser.add_argument("--run_id", default=0, type=int, nargs="?",
                        help="Helps to identify different runs of an experiments")
    parser.add_argument("--filter_size", default=3, type=int, nargs="?",
                        help="Filter width and height")
#     args = parser.parse_args()

    # hyperparameters
#     lr = args.learning_rate
#     num_filters = args.num_filters
#     batch_size = args.batch_size
#     epochs = args.epochs
#     filter_size = args.filter_size

    base_path = "./"
    
    # hyperparameters
    lrs = [0.1, 0.01, 0.001, 0.0001] #args.learning_rate
    lr = lrs[0]
    num_filters = 16 #args.num_filters
    batch_size = 128 #args.batch_size
    epochs = 5 #args.epochs
    filter_size = 3 #args.filter_size

    # train and test convolutional neural network
#     x_train, y_train, x_valid, y_valid, x_test, y_test = mnist(args.input_path)
    x_train, y_train, x_valid, y_valid, x_test, y_test = mnist(base_path)
    
    # SHAPE TEST
    print(x_train.shape)
    print(y_train.shape)
    print(x_valid.shape)
    print(y_valid.shape)
    print(x_test.shape)
    print(y_test.shape)
    
    learning_curve, model = train_and_validate(x_train, y_train, x_valid, y_valid, epochs, lr, num_filters, \
                                               batch_size, filter_size)
    test_error = test(x_test, y_test, model)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["lr"] = lr
    results["num_filters"] = num_filters
    results["batch_size"] = batch_size
    results["filter_size"] = filter_size
    results["learning_curve"] = learning_curve
    results["test_error"] = test_error

    print(results)
#     path = os.path.join(args.output_path, "results")
    path = os.path.join(base_path, "results")
    os.makedirs(path, exist_ok=True)
    
#     fname = os.path.join(path, "results_run_%d.json" % args.run_id)
    fname = os.path.join(path, "results_run_lr_%2f.json" % lr)
    fh = open(fname, "w")
    json.dump(results, fh, cls=JSONEnc)
    fh.close()

... loading data
... done loading data
(50000, 28, 28, 1)
(50000,)
(10000, 28, 28, 1)
(10000,)
(10000, 28, 28, 1)
(10000,)
./mnist_convnet_modellearningrate_0.1
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './mnist_convnet_modellearningrate_0.1', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000017900C398D0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num

INFO:tensorflow:Finished evaluation at 2018-11-10-14:50:44
INFO:tensorflow:Saving dict for global step 39: accuracy = 0.787, global_step = 39, loss = 0.7673814
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 39: ./mnist_convnet_modellearningrate_0.1\model.ckpt-39
EVAL EPOCH... 3
{'accuracy': 0.787, 'loss': 0.7673814, 'global_step': 39}
INFO:tensorflow:Calling model_fn.
x_input.shape (128, 28, 28, 1)
convlayer1.shape (128, 28, 28, 16)
maxpool1.shape (128, 14, 14, 16)
convlayer2.shape (128, 14, 14, 16)
maxpool2.shape (128, 7, 7, 16)
maxpool2_vec.shape (128, 784)
denselayer.shape (128, 128)
logitslayer.shape (128, 10)
labels.shape (128,)
sceloss.shape ()
tf.estimator.ModeKeys.TRAIN...
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./mnist_convnet_modellearningrate_0.1\model.ckpt-39
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init