In [None]:
import tensorflow as tf
import numpy as np


tf.reset_default_graph()
tf.logging.set_verbosity(tf.logging.INFO)

model_dir = "G:\\cnn-mnist-3x3-valid-C64_P2_C256_C324_C400_C905_D452_D1024_D144_D36_D10"
dropout_rate = 0.666


In [None]:
idx = 0 # The layer index

def normalize(mode, input):
    return tf.layers.batch_normalization(input, training=True)

def convolution(mode, input_layer, filters, kernel_size, padding="VALID"):
    global idx
    idx = idx+1
    print("Layer: conv" + str(idx))
    
    return normalize(mode, tf.layers.separable_conv2d(
          name="conv" + str(idx) + "_",
          inputs=input_layer,
          filters=filters,
          kernel_size=kernel_size,
          padding=padding,
          activation=tf.nn.relu))
                     
def pool(mode, input_layer, pool_size=[2,2], strides=[2,2]):
    global idx
    idx = idx+1
    print("Layer: pool" + str(idx) + "_")
    
    return tf.layers.max_pooling2d(inputs=input_layer, pool_size=pool_size, strides=strides, name="pool" + str(idx))

def deep(mode, layer, units, reshape=None):
    global idx
    idx = idx+1
    print("Layer: deep" + str(idx) + "_")
    
    if reshape != None:
        layer = tf.reshape(layer, reshape)
    layer = tf.layers.dropout(inputs=layer, rate=dropout_rate, training=mode == tf.estimator.ModeKeys.TRAIN)
    layer = tf.layers.dense(inputs=layer, units=units, activation=tf.nn.relu)
    layer = tf.layers.batch_normalization(layer, training=True)
    return layer


In [None]:


                     
def cnn_model_fn(features, labels, mode):
    """Model function for CNN."""
    with tf.device("/gpu:0"):
        # Input Layer
        initial = tf.reshape(features["x"], [-1, 28, 28, 1])

        # Convolutional Layer #1
        idx = 0
        layer = initial 
        
        x3 = [3,3]
        k  = [64, 256, 324, 400, 905]   # Increased from 484 to balance out the parameters that were contributed by earlier layers

        # Level 0
        layer = pool(mode, convolution(mode, layer, k[0], x3)) # 13x13
        layer = convolution(mode, layer, k[1], x3) # 11x11
        layer = convolution(mode, layer, k[2], x3) # 9x9
        layer = convolution(mode, layer, k[3], x3) # 7x7
        layer = convolution(mode, layer, k[4], x3) # 5x5
        
        
        # Deep encode each leaf of the hierarchy
        layer  = deep(mode, layer,  int(k[4]/2), [-1, 5*5*k[4]])
        layer = deep(mode, layer, 1024)
        layer = deep(mode, layer, 144)
        last = deep(mode, layer, 36)
        
        
        # Logits Layer
        logits = tf.layers.dense(inputs=layer, units=10, name="last_layer")

        predictions = {
          # Generate predictions (for PREDICT and EVAL mode)
          "classes": tf.argmax(input=logits, axis=1),
          # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
          # `logging_hook`.
          "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
        }
        
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
        
        # Calculate Loss (for both TRAIN and EVAL modes)
        onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
        print(onehot_labels)
        print(logits)

        loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)

        tf.summary.scalar('loss', loss)
        tf.summary.merge_all()
        
        # Configure the Training Op (for TRAIN mode)
        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.AdamOptimizer()
            train_op = optimizer.minimize(
                loss=loss,
                global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

        # Add evaluation metrics (for EVAL mode)
        
        
        eval_metric_ops = {
          "accuracy": tf.metrics.accuracy(
              labels=labels, predictions=predictions["classes"])}
        return tf.estimator.EstimatorSpec(
            mode=mode, 
            loss=loss, 
            eval_metric_ops=eval_metric_ops
        )

In [None]:
def trainTheModel(train_data, train_labels, eval_data, eval_labels):
    global idx
    # Create the Estimator
    session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)
    session_config.gpu_options.per_process_gpu_memory_fraction = 0.75
    
    run_config = tf.estimator.RunConfig()
    run_config = run_config.replace(
        save_checkpoints_steps=1000, 
        session_config=session_config,
        keep_checkpoint_max=1000)

    estimator = tf.estimator.Estimator(
        model_fn=cnn_model_fn, model_dir=model_dir, config=run_config)

    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        y=train_labels,
        batch_size=100,
        num_epochs=None,
        shuffle=True)
    
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": eval_data},
        y=eval_labels,
        batch_size=100,
        num_epochs=None,
        shuffle=True)
    
    summary_hook = tf.train.SummarySaverHook(
        100,
        output_dir=model_dir,
        scaffold=tf.train.Scaffold())
    
    for epoch in range(200):
        # train
        idx=0
        estimator.train(
            input_fn=train_input_fn,
            steps=500, hooks=[summary_hook])
        tf.reset_default_graph()
        idx=0
        estimator.evaluate(input_fn=eval_input_fn, steps=100)
    
    

In [None]:

def main(args):
    mnist = tf.contrib.learn.datasets.load_dataset("mnist")
    train_data = mnist.train.images # Returns np.array
    train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
    eval_data = mnist.test.images # Returns np.array
    eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

    trainTheModel(train_data, train_labels, eval_data, eval_labels)

if __name__ == "__main__":
    try:
        tf.app.run(main)
    except SystemExit:
        print('System Exited')