In [None]:
from __future__ import print_function

import argparse
import gzip
import json
import os
import pickle

import numpy as np
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)


def one_hot(labels):
    """this creates a one hot encoding from a flat vector:
    i.e. given y = [0,2,1]
     it creates y_one_hot = [[1,0,0], [0,0,1], [0,1,0]]
    """
    classes = np.unique(labels)
    n_classes = classes.size
    one_hot_labels = np.zeros(labels.shape + (n_classes,))
    for c in classes:
        one_hot_labels[labels == c, c] = 1
    return one_hot_labels


def mnist(datasets_dir='./data'):
    if not os.path.exists(datasets_dir):
        os.mkdir(datasets_dir)
    data_file = os.path.join(datasets_dir, 'mnist.pkl.gz')
    if not os.path.exists(data_file):
        print('... downloading MNIST from the web')
        try:
            import urllib
            urllib.urlretrieve('http://google.com')
        except AttributeError:
            import urllib.request as urllib
        url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        urllib.urlretrieve(url, data_file)

    print('... loading data')
    # Load the dataset
    f = gzip.open(data_file, 'rb')
    try:
        train_set, valid_set, test_set = pickle.load(f, encoding="latin1")
    except TypeError:
        train_set, valid_set, test_set = pickle.load(f)
    f.close()

    test_x, test_y = test_set
    test_x = test_x.astype('float32')
    test_x = test_x.astype('float32').reshape(test_x.shape[0], 28, 28, 1)
    test_y = test_y.astype('int32')
    valid_x, valid_y = valid_set
    valid_x = valid_x.astype('float32')
    valid_x = valid_x.astype('float32').reshape(valid_x.shape[0], 28, 28, 1)
    valid_y = valid_y.astype('int32')
    train_x, train_y = train_set
    train_x = train_x.astype('float32').reshape(train_x.shape[0], 28, 28, 1)
    train_y = train_y.astype('int32')
    print('... done loading data')
    return train_x, train_y, valid_x, valid_y, test_x, test_y

def unhot(one_hot_labels):
    """ Invert a one hot encoding, creating a flat vector """
    return np.argmax(one_hot_labels, axis=-1)

def cnn_network(features,labels,mode):
    # Convolutional Layer #1
    # Input Layer
    input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
    conv1 = tf.layers.conv2d(
              inputs=input_layer,
              filters=16,
              kernel_size=[5, 5],
              padding="same",
              activation=tf.nn.relu)

    # Pooling Layer #1
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
    # Convolutional Layer #1
    conv2 = tf.layers.conv2d(
              inputs=pool1,
              filters=16,
              kernel_size=[5, 5],
              padding="same",
              activation=tf.nn.relu)

    # Pooling Layer #1
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
        
    # Dense Layer
    #np.ravel(np.copy(pool2)) #    
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 16])
    dense = tf.layers.dense(inputs=pool2_flat, units=128, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4,training=True)#size: [batch_size,128]#, training=mode == tf.estimator.ModeKeys.TRAIN)
            
    # Logits Layer        
    logits = tf.layers.dense(inputs=dropout, units=10)#size: [batch_size,10]  
    
    predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
     "classes": tf.argmax(input=logits, axis=1),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the `logging_hook`.
     "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
       }
    #if mode == tf.estimator.ModeKeys.PREDICT:
       # return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
        
    # Calculate Loss
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
      
    if mode == tf.estimator.ModeKeys.TRAIN:
        print("in training mode............")
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001)
   
        train_op  = optimizer.minimize(loss=loss,global_step=tf.train.get_global_step())
        estimator  = tf.estimator.EstimatorSpec(tf.estimator.ModeKeys.TRAIN,loss=loss, train_op=train_op)
        return estimator
    
    # Add evaluation metrics (for EVAL mode)
    if mode == tf.estimator.ModeKeys.EVAL: 
        print("in validation mode............")
        eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
    
   
    
def train_and_validate(x_train, y_train, x_valid, y_valid, num_epochs, lr, num_filters, batch_size):
    # TODO: train and validate your convolutional neural networks with the provided data and hyperparameters
    #cnn_network(x_train,y_train,lr, num_filters)
    #print(np.shape(x_train))
    #print(np.shape(y_train))
    #print("y train labels::",y_train)
    #print("y train labels::")
    
    # Set up logging for predictions
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=20)
    
    # Create the Estimator
    mnist_classifier = tf.estimator.Estimator(model_fn=cnn_network, model_dir="./mnist_convnet_modellearningrate0001-5x5")
    
    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": x_train},    y=y_train,    batch_size=batch_size,   
                                                        num_epochs=None,    shuffle=True)
    
    # Evaluate the model and print results
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(    x={"x": x_valid},  y=y_valid,  num_epochs=1, shuffle=False)
        
    learning_curve=[]
    
    for i in range(1,num_epochs):
        train_results = mnist_classifier.train( input_fn = train_input_fn,  hooks=[logging_hook],steps=1)
        print("training results")
        print(train_results)


        eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
        print("eval results")
        print(eval_results)
        learning_curve.append(eval_results)
    
    return learning_curve,mnist_classifier  # TODO: Return the validation error after each epoch (i.e learning curve) and your model


def test(x_test, y_test, model):
    # TODO: test your network here by evaluating it on the test data
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(    x={"x": x_test},  y=y_test,  num_epochs=1, shuffle=False)
    
    test_results = model.evaluate(input_fn=eval_input_fn)
    
    return test_results


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--output_path", default="./", type=str, nargs="?",
                        help="Path where the results will be stored")
    parser.add_argument("--input_path", default="./", type=str, nargs="?",
                        help="Path where the data is located. If the data is not available it will be downloaded first")
    parser.add_argument("--learning_rate", default=1e-3, type=float, nargs="?", help="Learning rate for SGD")
    parser.add_argument("--num_filters", default=32, type=int, nargs="?",
                        help="The number of filters for each convolution layer")
    parser.add_argument("--batch_size", default=128, type=int, nargs="?", help="Batch size for SGD")
    parser.add_argument("--epochs", default=12, type=int, nargs="?",
                        help="Determines how many epochs the network will be trained")
    parser.add_argument("--run_id", default=0, type=int, nargs="?",
                        help="Helps to identify different runs of an experiments")

    #args = parser.parse_args()

    # hyperparameters
    lr = [0.1,0.01,0.001,0.0001] #args.learning_rate
    def get_learning_rates():
        return lr
    
    num_filters = 16 #args.num_filters
    batch_size = 128 #args.batch_size
    epochs = 2000 #args.epochs

    # train and test convolutional neural network
    x_train, y_train, x_valid, y_valid, x_test, y_test = mnist("./")#args.input_path)

    # for filter 5x5
    learning_curve, model =  train_and_validate(x_train, y_train, x_valid, y_valid, epochs, lr, num_filters, batch_size)
    test_error = test(x_test, y_test, model)
    print("test results:",test_error)
       

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["lr"] = lr
    results["num_filters"] = num_filters
    results["batch_size"] = batch_size
    results["learning_curve"] = learning_curve
    results["test_error"] = test_error

    path = os.path.join("./", "results")#args.output_path
    os.makedirs(path, exist_ok=True)

    fname = os.path.join(path, "results_run_%d.json" )#% args.run_id)

    fh = open(fname, "w")
    #json.dump(results, fh)
    fh.close()


In [None]:
#plot learning curve
import matplotlib.pyplot as plt

def plot(accuracy,max_epochs):
    fig, ax = plt.subplots(1, 1)
    ax.set_title('Validation accuracy')
    ax.set_xlim([0, max_epochs+10])
    ax.set_ylim([0, 1])
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Accuracy')
    ax.plot(accuracy, c='b', label=' learning rate=0.0001 \n 16 5x5 filters\n test accuracy={:.4f}'.format(test_error['accuracy']))
    ax.legend(loc='upper right')
    fig.canvas.draw()
    
accuracy = []

for entry in learning_curve:
    accuracy.append(entry['accuracy'])
plot(accuracy,len(learning_curve))
    