reference: https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/06_CIFAR-10.ipynb

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import os

## Load the data

In [2]:
import cifar10

In [3]:
cifar10.maybe_download_and_extract()

Data has apparently already been downloaded and unpacked.


In [4]:
images_train, cls_train, labels_train = cifar10.load_training_data()
images_test, cls_test, labels_test = cifar10.load_test_data()

Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_1
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_2
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_3
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_4
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_5
Loading data: data/CIFAR-10/cifar-10-batches-py/test_batch


In [5]:
X_val=images_train[:5000,:,:,:]
y_onehot_val=labels_train[:5000,:]
X_train=images_train[5000:,:,:,:]
y_onehot=labels_train[5000:,:]

In [6]:
# evaluate performance on some data 
def perf_eval(logit_pred, y_true):
    """a function to evaluate performance of predicted y values vs true class labels"""
    # now look at some data
    print('    sample pred: {0}\n    sample true: {1}'.format(np.argmax(logit_pred[0:20],1),np.argmax(y_true[0:20],1)))
    # avg accuracy
    is_correct_vals = np.equal(np.argmax(logit_pred,1),np.argmax(y_true,1))
    #accuracy_vals = np.mean(is_correct_vals)
    #print('    mean classification accuracy: {0}%'.format(100*accuracy_vals))
    # Dig in a little deeper.  Where did we make correct predictions?  Does this seem reasonable?
    print('    correct predictions by class: {0}'.format(y_true[is_correct_vals,:].sum(axis=0)))

In [7]:
# cnn conv stuff
def conv(x, W):
    """simple wrapper for tf.nn.conv2d"""
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def maxpool(x):
    """simple wrapper for tf.nn.max_pool with stride size 2"""
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

def norm(x): 
    """simple wrapper for tf.nn.lrn... See section 3.3 of Krizhevsky 2012 for details"""
    return tf.nn.lrn(x, depth_radius=5, bias=2, alpha=1e-4, beta=0.75)

In [8]:
# elaborate the compute_logits code to include a variety of models
def compute_logits(x, model_type, pkeep):
    """Compute the logits of the model"""
    if model_type=='lr':
        W = tf.get_variable('W', shape=[32*32*3, 10])
        b = tf.get_variable('b', shape=[10])
        logits = tf.add(tf.matmul(x, W), b, name='logits_lr')
    elif model_type=='cnn_cf':
        # try a 1 layer cnn
        n1 = 64
        x_image = tf.reshape(x, [-1,32,32,3]) # batch, then width, height, channels
        # cnn layer 1
        W_conv1 = tf.get_variable('W_conv1', shape=[5, 5, 3, n1])
        b_conv1 = tf.get_variable('b_conv1', shape=[n1])
        h_conv1 = tf.nn.relu(tf.add(conv(x_image, W_conv1), b_conv1))
        # fc layer to logits
        h_conv1_flat = tf.reshape(h_conv1, [-1, 32*32*n1])
        W_fc1 = tf.get_variable('W_fc1', shape=[32*32*n1, 10])
        b_fc1 = tf.get_variable('b_fc1', shape=[10])
        logits = tf.add(tf.matmul(h_conv1_flat, W_fc1), b_fc1, name='logits_cnn_cf')
    elif model_type=='cnn_cnf':
        # try a 1 layer cnn with a normalization layer
        n1 = 64
        x_image = tf.reshape(x, [-1,32,32,3]) # batch, then width, height, channels
        # cnn layer 1
        W_conv1 = tf.get_variable('W_conv1', shape=[5, 5, 3, n1])
        b_conv1 = tf.get_variable('b_conv1', shape=[n1])
        h_conv1 = tf.nn.relu(tf.add(conv(x_image, W_conv1), b_conv1))
        # norm layer 1
        h_norm1 = norm(h_conv1)
        # fc layer to logits
        h_flat = tf.reshape(h_norm1, [-1, 32*32*n1])
        W_fc1 = tf.get_variable('W_fc1', shape=[32*32*n1, 10])
        b_fc1 = tf.get_variable('b_fc1', shape=[10])
        logits = tf.add(tf.matmul(h_flat, W_fc1), b_fc1, name='logits_cnn_cnf')     
    elif model_type=='cnn_cpncpnff':
        # 2 layer cnn
        n1 = 32
        n2 = 64
        n3 = 1024
        x_image = tf.reshape(x, [-1,32,32,3]) # batch, then width, height, channels
        # cnn layer 1
        W_conv1 = tf.get_variable('W_conv1', shape=[5, 5, 3, n1])
        b_conv1 = tf.get_variable('b_conv1', shape=[n1])
        h_conv1 = tf.nn.relu(tf.add(conv(x_image, W_conv1), b_conv1))
        # pool 1
        h_pool1 = maxpool(h_conv1)
        # norm 1
        h_norm1 = norm(h_pool1)
        # cnn layer 2
        W_conv2 = tf.get_variable('W_conv2', shape=[5, 5, n1, n2])
        b_conv2 = tf.get_variable('b_conv2', shape=[n2])
        h_conv2 = tf.nn.relu(tf.add(conv(h_norm1, W_conv2), b_conv2))
        # pool 2
        h_pool2 = maxpool(h_conv2)
        # norm 2
        h_norm2 = norm(h_pool2)
        # fc layer to logits (8x8 since 2 rounds of maxpool)
        h_norm2_flat = tf.reshape(h_norm2, [-1, 8*8*n2])
        W_fc1 = tf.get_variable('W_fc1', shape=[8*8*n2, n3])
        b_fc1 = tf.get_variable('b_fc1', shape=[n3])
        h_fc1 = tf.nn.relu(tf.add(tf.matmul(h_norm2_flat, W_fc1), b_fc1))
        # one more fc layer
        # ... again, this is the logistic layer with softmax readout
        W_fc2 = tf.get_variable('W_fc2', shape=[n3,10])
        b_fc2 = tf.get_variable('b_fc2', shape=[10])
        logits = tf.add(tf.matmul(h_fc1, W_fc2), b_fc2, name='logits_cnn_cpncpnff')
    elif model_type=='cnn_cpncpnfdf':
        # same as above but add dropout.
        # 2 layer cnn
        n1 = 32
        n2 = 64
        n3 = 1024
        x_image = tf.reshape(x, [-1,32,32,3]) # batch, then width, height, channels
        # cnn layer 1
        W_conv1 = tf.get_variable('W_conv1', shape=[5, 5, 3, n1])
        b_conv1 = tf.get_variable('b_conv1', shape=[n1])
        h_conv1 = tf.nn.relu(tf.add(conv(x_image, W_conv1), b_conv1))
        # pool 1
        h_pool1 = maxpool(h_conv1)
        # norm 1
        h_norm1 = norm(h_pool1)
        # cnn layer 2
        W_conv2 = tf.get_variable('W_conv2', shape=[5, 5, n1, n2])
        b_conv2 = tf.get_variable('b_conv2', shape=[n2])
        h_conv2 = tf.nn.relu(tf.add(conv(h_norm1, W_conv2), b_conv2))
        # pool 2
        h_pool2 = maxpool(h_conv2)
        # norm 2
        h_norm2 = norm(h_pool2)
        # fc layer to logits (8x8 since 2 rounds of maxpool)
        h_norm2_flat = tf.reshape(h_norm2, [-1, 8*8*n2])
        W_fc1 = tf.get_variable('W_fc1', shape=[8*8*n2, n3])
        b_fc1 = tf.get_variable('b_fc1', shape=[n3])
        h_fc1 = tf.nn.relu(tf.add(tf.matmul(h_norm2_flat, W_fc1), b_fc1))
        # insert a dropout layer here.
        h_fc1_drop = tf.nn.dropout(h_fc1, pkeep)
        # one more fc layer
        # ... again, this is the logistic layer with softmax readout
        W_fc2 = tf.get_variable('W_fc2', shape=[n3,10])
        b_fc2 = tf.get_variable('b_fc2', shape=[10])
        logits = tf.add(tf.matmul(h_fc1_drop, W_fc2), b_fc2, name='logits_cnn_cpncpnfdf')
    elif model_type=='vgg16':
        x = tf.reshape(x, [-1,32,32,3])
        #layer1
        #cnn layer 1.1
        W_conv11 = tf.get_variable('W_conv11', shape=[3, 3, 3, 64])
        b_conv11 = tf.get_variable('b_conv11', shape=[64])
        print(W_conv11,b_conv11)
        h_conv11 = tf.nn.relu(tf.add(conv(x, W_conv11), b_conv11))  #[batch,32,32,64]
        print(h_conv11)
        #cnn layer 1.2
        W_conv12 = tf.get_variable('W_conv12', shape=[3, 3, 64, 64])
        b_conv12 = tf.get_variable('b_conv12', shape=[64])
        h_conv12 = tf.nn.relu(tf.add(conv(h_conv11, W_conv12), b_conv12))
        #pool 1
        pool1=maxpool(h_conv12)

        #cnn layer 2.1
        W_conv21 = tf.get_variable('W_conv21', shape=[3, 3, 64, 128])
        b_conv21 = tf.get_variable('b_conv21', shape=[128])
        h_conv21 = tf.nn.relu(tf.add(conv(pool1, W_conv21), b_conv21))  
        #cnn layer 2.2
        W_conv22 = tf.get_variable('W_conv22', shape=[3, 3, 128, 128])
        b_conv22 = tf.get_variable('b_conv22', shape=[128])
        h_conv22 = tf.nn.relu(tf.add(conv(h_conv21, W_conv22), b_conv22))
        #pool 2
        pool2=maxpool(h_conv22)

        #cnn layer 3.1
        W_conv31 = tf.get_variable('W_conv31', shape=[3, 3, 128, 256])
        b_conv31 = tf.get_variable('b_conv31', shape=[256])
        h_conv31 = tf.nn.relu(tf.add(conv(pool2, W_conv31), b_conv31))  
        #cnn layer 3.2
        W_conv32 = tf.get_variable('W_conv32', shape=[3, 3, 256, 256])
        b_conv32 = tf.get_variable('b_conv32', shape=[256])
        h_conv32 = tf.nn.relu(tf.add(conv(h_conv31, W_conv32), b_conv32))
        #cnn layer 3.3
        W_conv33 = tf.get_variable('W_conv33', shape=[3, 3, 256, 256])
        b_conv33 = tf.get_variable('b_conv33', shape=[256])
        h_conv33 = tf.nn.relu(tf.add(conv(h_conv32, W_conv33), b_conv33))
        #pool 3
        pool3=maxpool(h_conv33)

        #cnn layer 4.1
        W_conv41 = tf.get_variable('W_conv41', shape=[3, 3, 256, 512])
        b_conv41 = tf.get_variable('b_conv41', shape=[512])
        h_conv41 = tf.nn.relu(tf.add(conv(pool3, W_conv41), b_conv41))  
        #cnn layer 4.2
        W_conv42 = tf.get_variable('W_conv42', shape=[3, 3, 512, 512])
        b_conv42 = tf.get_variable('b_conv42', shape=[512])
        h_conv42 = tf.nn.relu(tf.add(conv(h_conv41, W_conv42), b_conv42))
        #cnn layer 4.3
        W_conv43 = tf.get_variable('W_conv43', shape=[3, 3, 512, 512])
        b_conv43 = tf.get_variable('b_conv43', shape=[512])
        h_conv43 = tf.nn.relu(tf.add(conv(h_conv42, W_conv43), b_conv43))
        #pool4
        pool4=maxpool(h_conv43)

        #cnn layer 5.1
        W_conv51 = tf.get_variable('W_conv51', shape=[3, 3, 512, 512])
        b_conv51 = tf.get_variable('b_conv51', shape=[512])
        h_conv51 = tf.nn.relu(tf.add(conv(pool4, W_conv51), b_conv51))  
        #cnn layer 5.2
        W_conv52 = tf.get_variable('W_conv52', shape=[3, 3, 512, 512])
        b_conv52 = tf.get_variable('b_conv52', shape=[512])
        h_conv52 = tf.nn.relu(tf.add(conv(h_conv51, W_conv52), b_conv52))
        #cnn layer 5.3
        W_conv53 = tf.get_variable('W_conv53', shape=[3, 3, 512, 512])
        b_conv53 = tf.get_variable('b_conv53', shape=[512])
        h_conv53 = tf.nn.relu(tf.add(conv(h_conv52, W_conv53), b_conv53))
        #pool 5
        pool5=maxpool(h_conv53)

        #fc1-4096
        shape=int(np.prod(pool5.get_shape()[1:]))
        pool5_flat=tf.reshape(pool5,[-1,shape])
        W_fc1 = tf.get_variable('W_fc1', shape=[shape,4096])
        b_fc1 = tf.get_variable('b_fc1', shape=[4096])
        h_fc1 = tf.nn.relu(tf.add(tf.matmul(pool5_flat, W_fc1), b_fc1))

        #fc2-4096
        W_fc2 = tf.get_variable('W_fc2', shape=[4096,4096])
        b_fc2 = tf.get_variable('b_fc2', shape=[4096])
        h_fc2 = tf.nn.relu(tf.add(tf.matmul(h_fc1, W_fc2), b_fc2))

        #fc3-1000
        W_fc3 = tf.get_variable('W_fc3', shape=[4096,1000])
        b_fc3 = tf.get_variable('b_fc3', shape=[1000])
        h_fc3 = tf.nn.relu(tf.add(tf.matmul(h_fc2, W_fc3), b_fc3))
        
        #from softmax to logits
        W_sf = tf.get_variable('W_sf', shape=[1000,10])
        b_sf = tf.get_variable('b_sf', shape=[10])
        logits = tf.add(tf.matmul(h_fc3, W_sf), b_sf, name='logits_vgg16')
    else: 
        print('error not a valid model type')
    print(logits)
    return logits

def compute_cross_entropy(logits, y):
    # Compute the average cross-entropy across all the examples.
    numerical_instability_example = 0
    if numerical_instability_example:
        y_pred = tf.nn.softmax(logits, name='y_pred') # the predicted probability for each example.
        cross_ent = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_pred), reduction_indices=[1]))
    else:
        print(logits,y)
        sm_ce = tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=logits, name='cross_ent_terms')
        cross_ent = tf.reduce_mean(sm_ce, name='cross_ent')
    return cross_ent

def compute_accuracy(logits, y):
    prediction = tf.argmax(logits, 1, name='pred_class')
    true_label = tf.argmax(y, 1, name='true_class')
    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, true_label), tf.float32))
    return accuracy

# VGG

In [9]:
# choose case to run 
opt_method = 'sgd'
model_type =  'vgg16'
dir_name = 'logs/scratch04x/'
batch_size = 50
n=50000
n_val=5000

In [10]:
with tf.Graph().as_default():
    # We build the model here as before
    x = tf.placeholder(tf.float32, [None, 32*32*3], name='x')
    y = tf.placeholder(tf.float32, [None, 10], name='y')
    pkeep = tf.placeholder(tf.float32, name='pkeep')
    
    with tf.name_scope('model'):
        logits = compute_logits(x, model_type, pkeep)
    with tf.name_scope('loss'):
        loss = compute_cross_entropy(logits=logits, y=y)
    with tf.name_scope('accuracy'):
        accuracy = compute_accuracy(logits, y)
    
    with tf.name_scope('opt'):
        if opt_method == 'sgd':
            opt = tf.train.GradientDescentOptimizer(0.5)
        elif opt_method == 'rms':
            opt = tf.train.RMSPropOptimizer(.001)
        elif opt_method == 'adam':
            opt = tf.train.AdamOptimizer(1e-4)
        train_step = opt.minimize(loss)
    
    with tf.name_scope('summaries'):
        # create summary for loss and accuracy
        tf.summary.scalar('loss', loss) 
        tf.summary.scalar('accuracy', accuracy)
        # create summary for logits
        tf.summary.histogram('logits', logits)
        # create summary for input image
        tf.summary.image('input', tf.reshape(x, [-1, 32, 32, 3]))
    
        summary_op = tf.summary.merge_all()
    
    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter(dir_name, sess.graph)
        summary_writer_train = tf.summary.FileWriter(dir_name+'/train', sess.graph)
        summary_writer_val = tf.summary.FileWriter(dir_name+'/val')
        
        sess.run(tf.global_variables_initializer())
    
        for i in range(301):
            batch = np.floor(np.random.rand(batch_size)*(n-n_val)).astype(int)
            X_batch = X_train[batch,:,:,:].reshape([batch_size,-1])
            y_batch = y_onehot[batch]

            # now run
            _ , summary = sess.run((train_step, summary_op),
                                      feed_dict={x: X_batch, y: y_batch, pkeep:0.85})
            
            # write the summary output to file
            if i%100==0:
                summary_writer_train.add_summary(summary, i)

            # print diagnostics
            if i%100 == 0:
                X_batch = X_train[0:1000,:,:,:].reshape([1000,-1])
                y_batch = y_onehot[0:1000]
                (train_error,train_logits) = sess.run((accuracy,logits), {x: X_batch, y: y_batch, pkeep:1.0})
                print("\rStep {0:3d}: training accuracy {1:0.4f}".format(i, train_error), flush=True)
                # further diagnostics
                perf_eval(train_logits, y_batch)
                
            if i%100 == 0:
                X_batch = X_val.reshape([n_val,-1])
                y_batch = y_onehot_val
                (val_error, summary) = sess.run((accuracy,summary_op), {x:X_batch, y:y_batch, pkeep:1.0})
                print("\rStep {0:3d}: val accuracy {1:0.4f}".format(i, val_error), flush=True)
                summary_writer_val.add_summary(summary, i)


<tf.Variable 'W_conv11:0' shape=(3, 3, 3, 64) dtype=float32_ref> <tf.Variable 'b_conv11:0' shape=(64,) dtype=float32_ref>
Tensor("model/Relu:0", shape=(?, 32, 32, 64), dtype=float32)
Tensor("model/logits_vgg16:0", shape=(?, 10), dtype=float32)
Tensor("model/logits_vgg16:0", shape=(?, 10), dtype=float32) Tensor("y:0", shape=(?, 10), dtype=float32)
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Step   0: training accuracy 0.0910
    sample pred: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 91.  0.  0.  0.  0.  0.  0.  0.  0.]
Step   0: val accuracy 0.0920


KeyboardInterrupt: 

In [11]:
model_type =  'lr'
with tf.Graph().as_default():
    # We build the model here as before
    x = tf.placeholder(tf.float32, [None, 32*32*3], name='x')
    y = tf.placeholder(tf.float32, [None, 10], name='y')
    pkeep = tf.placeholder(tf.float32, name='pkeep')
    
    with tf.name_scope('model'):
        logits = compute_logits(x, model_type, pkeep)
    with tf.name_scope('loss'):
        loss = compute_cross_entropy(logits=logits, y=y)
    with tf.name_scope('accuracy'):
        accuracy = compute_accuracy(logits, y)
    
    with tf.name_scope('opt'):
        if opt_method == 'sgd':
            opt = tf.train.GradientDescentOptimizer(0.5)
        elif opt_method == 'rms':
            opt = tf.train.RMSPropOptimizer(.001)
        elif opt_method == 'adam':
            opt = tf.train.AdamOptimizer(1e-4)
        train_step = opt.minimize(loss)
    
    with tf.name_scope('summaries'):
        # create summary for loss and accuracy
        tf.summary.scalar('loss', loss) 
        tf.summary.scalar('accuracy', accuracy)
        # create summary for logits
        tf.summary.histogram('logits', logits)
        # create summary for input image
        tf.summary.image('input', tf.reshape(x, [-1, 32, 32, 3]))
    
        summary_op = tf.summary.merge_all()
    
    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter(dir_name, sess.graph)
        summary_writer_train = tf.summary.FileWriter(dir_name+'/train', sess.graph)
        summary_writer_val = tf.summary.FileWriter(dir_name+'/val')
        
        sess.run(tf.global_variables_initializer())
    
        for i in range(301):
            batch = np.floor(np.random.rand(batch_size)*(n-n_val)).astype(int)
            X_batch = X_train[batch,:,:,:].reshape([batch_size,-1])
            y_batch = y_onehot[batch]

            # now run
            _ , summary = sess.run((train_step, summary_op),
                                      feed_dict={x: X_batch, y: y_batch, pkeep:0.85})
            
            # write the summary output to file
            if i%100==0:
                summary_writer_train.add_summary(summary, i)

            # print diagnostics
            if i%100 == 0:
                X_batch = X_train[0:1000,:,:,:].reshape([1000,-1])
                y_batch = y_onehot[0:1000]
                (train_error,train_logits) = sess.run((accuracy,logits), {x: X_batch, y: y_batch, pkeep:1.0})
                print("\rStep {0:3d}: training accuracy {1:0.4f}".format(i, train_error), flush=True)
                # further diagnostics
                perf_eval(train_logits, y_batch)
                
            if i%100 == 0:
                X_batch = X_val.reshape([n_val,-1])
                y_batch = y_onehot_val
                (val_error, summary) = sess.run((accuracy,summary_op), {x:X_batch, y:y_batch, pkeep:1.0})
                print("\rStep {0:3d}: val accuracy {1:0.4f}".format(i, val_error), flush=True)
                summary_writer_val.add_summary(summary, i)


Tensor("model/logits_lr:0", shape=(?, 10), dtype=float32)
Tensor("model/logits_lr:0", shape=(?, 10), dtype=float32) Tensor("y:0", shape=(?, 10), dtype=float32)
Step   0: training accuracy 0.0970
    sample pred: [6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0.  0.  0.  0.  0.  0. 97.  0.  0.  0.]
Step   0: val accuracy 0.1052
Step 100: training accuracy 0.1640
    sample pred: [1 9 1 1 1 1 1 1 9 1 0 9 1 1 1 0 1 1 1 1]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [60. 71.  9.  0.  0.  0.  5.  0.  0. 19.]
Step 100: val accuracy 0.1702
Step 200: training accuracy 0.1900
    sample pred: [7 8 1 1 3 3 3 3 8 8 8 8 3 1 1 3 8 3 1 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 27.  0. 64.  0.  0.  0. 15. 84.  0.]
Step 200: val accuracy 0.1940
Step 300: training accuracy 0.1770
    sample pred: [7 8 8 5 5 

In [12]:
with tf.Graph().as_default():
    # We build the model here as before
    x = tf.placeholder(tf.float32, [None, 32*32*3], name='x')
    y = tf.placeholder(tf.float32, [None, 10], name='y')
    pkeep = tf.placeholder(tf.float32, name='pkeep')
    
    with tf.name_scope('model'):
        logits = compute_logits(x, model_type, pkeep)
    with tf.name_scope('loss'):
        loss = compute_cross_entropy(logits=logits, y=y)
    with tf.name_scope('accuracy'):
        accuracy = compute_accuracy(logits, y)
    
    with tf.name_scope('opt'):
        if opt_method == 'sgd':
            opt = tf.train.GradientDescentOptimizer(0.5)
        elif opt_method == 'rms':
            opt = tf.train.RMSPropOptimizer(.001)
        elif opt_method == 'adam':
            opt = tf.train.AdamOptimizer(1e-4)
        train_step = opt.minimize(loss)
    
    with tf.name_scope('summaries'):
        # create summary for loss and accuracy
        tf.summary.scalar('loss', loss) 
        tf.summary.scalar('accuracy', accuracy)
        # create summary for logits
        tf.summary.histogram('logits', logits)
        # create summary for input image
        tf.summary.image('input', tf.reshape(x, [-1, 32, 32, 3]))
    
        summary_op = tf.summary.merge_all()
    
    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter(dir_name, sess.graph)
        summary_writer_train = tf.summary.FileWriter(dir_name+'/train', sess.graph)
        summary_writer_val = tf.summary.FileWriter(dir_name+'/val')
        
        sess.run(tf.global_variables_initializer())
    
        for i in range(40001):
            batch = np.floor(np.random.rand(batch_size)*(n-n_val)).astype(int)
            X_batch = X_train[batch,:,:,:].reshape([batch_size,-1])
            y_batch = y_onehot[batch]

            # now run
            _ , summary = sess.run((train_step, summary_op),
                                      feed_dict={x: X_batch, y: y_batch, pkeep:0.85})
            
            # write the summary output to file
            if i%100==0:
                summary_writer_train.add_summary(summary, i)

            # print diagnostics
            if i%100 == 0:
                X_batch = X_train[0:1000,:,:,:].reshape([1000,-1])
                y_batch = y_onehot[0:1000]
                (train_error,train_logits) = sess.run((accuracy,logits), {x: X_batch, y: y_batch, pkeep:1.0})
                print("\rStep {0:3d}: training accuracy {1:0.4f}".format(i, train_error), flush=True)
                # further diagnostics
                perf_eval(train_logits, y_batch)
                
            if i%100 == 0:
                X_batch = X_val.reshape([n_val,-1])
                y_batch = y_onehot_val
                (val_error, summary) = sess.run((accuracy,summary_op), {x:X_batch, y:y_batch, pkeep:1.0})
                print("\rStep {0:3d}: val accuracy {1:0.4f}".format(i, val_error), flush=True)
                summary_writer_val.add_summary(summary, i)


Tensor("model/logits_lr:0", shape=(?, 10), dtype=float32)
Tensor("model/logits_lr:0", shape=(?, 10), dtype=float32) Tensor("y:0", shape=(?, 10), dtype=float32)
Step   0: training accuracy 0.1050
    sample pred: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [105.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
Step   0: val accuracy 0.1016
Step 100: training accuracy 0.1250
    sample pred: [6 6 9 6 6 6 6 6 6 6 0 6 6 6 6 6 6 6 6 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [10.  0.  2.  0.  0.  0. 97.  0.  8.  8.]
Step 100: val accuracy 0.1364
Step 200: training accuracy 0.1420
    sample pred: [6 6 1 6 6 6 6 6 6 6 0 6 6 1 1 4 6 6 6 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 5. 19.  0.  0. 23.  1. 93.  1.  0.  0.]
Step 200: val accuracy 0.1516
Step 300: training accuracy 0.1570
    sample pred: [

Step 3100: training accuracy 0.3070
    sample pred: [7 4 1 1 6 4 4 4 8 1 0 9 4 9 1 4 4 7 1 4]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [31. 41.  0.  3. 77.  0. 21. 42. 61. 31.]
Step 3100: val accuracy 0.3080
Step 3200: training accuracy 0.2160
    sample pred: [7 4 8 8 5 4 4 5 8 8 8 4 4 4 8 4 3 5 3 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0.  1.  0. 15. 84. 26.  0. 19. 71.  0.]
Step 3200: val accuracy 0.2246
Step 3300: training accuracy 0.1720
    sample pred: [8 8 1 1 5 8 8 8 8 8 8 8 8 1 1 8 8 5 1 8]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 37.  0.  0. 12. 18.  6.  2. 97.  0.]
Step 3300: val accuracy 0.1814
Step 3400: training accuracy 0.3250
    sample pred: [7 0 9 9 3 6 3 3 9 9 0 9 7 9 9 7 0 7 9 7]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [54.  0.  9. 24. 18.  3. 32. 

Step 6300: training accuracy 0.2530
    sample pred: [5 9 1 1 5 5 5 5 1 9 8 9 5 9 1 5 5 5 9 5]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 31.  0.  0.  0. 87.  1. 18. 34. 82.]
Step 6300: val accuracy 0.2446
Step 6400: training accuracy 0.2190
    sample pred: [6 9 9 1 6 6 6 6 9 9 8 9 6 9 1 6 3 6 3 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 13.  0.  9.  1.  2. 91.  0. 33. 70.]
Step 6400: val accuracy 0.2196
Step 6500: training accuracy 0.2200
    sample pred: [6 0 0 3 3 6 3 3 3 9 0 3 0 9 0 0 0 3 3 0]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [98.  0.  0. 35.  0.  0. 55.  7. 13. 12.]
Step 6500: val accuracy 0.2146
Step 6600: training accuracy 0.2470
    sample pred: [7 1 1 1 5 4 5 5 1 1 1 1 1 1 1 4 1 5 1 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 76.  0.  9. 45. 42. 26. 

Step 9500: training accuracy 0.3110
    sample pred: [7 0 9 1 6 4 0 4 9 9 0 9 0 9 9 0 0 7 9 0]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [90. 19.  0.  3. 40.  0. 27. 46.  2. 84.]
Step 9500: val accuracy 0.2746
Step 9600: training accuracy 0.2780
    sample pred: [7 1 1 1 6 6 7 1 1 1 8 1 1 1 1 7 1 7 1 7]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 63. 29.  0.  1.  0. 57. 86. 42.  0.]
Step 9600: val accuracy 0.2772
Step 9700: training accuracy 0.2890
    sample pred: [7 3 1 1 3 6 3 3 7 1 0 9 3 1 1 7 3 7 3 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 3. 52. 36. 66.  0.  5. 20. 76.  0. 31.]
Step 9700: val accuracy 0.2810
Step 9800: training accuracy 0.1930
    sample pred: [7 5 1 5 5 5 5 5 1 5 8 5 5 1 1 5 5 5 5 5]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 41.  0.  0.  0. 90. 18. 

    sample pred: [7 9 9 9 3 4 9 9 9 9 8 9 9 9 9 9 9 7 9 9]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [  0.  22.   1.   6.  32.   0.   0.  35.   1. 112.]
Step 12600: val accuracy 0.1810
Step 12700: training accuracy 0.3080
    sample pred: [2 2 1 1 5 2 5 5 2 1 8 9 2 9 1 2 2 5 1 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 37. 72. 14.  0. 48. 25. 20. 54. 38.]
Step 12700: val accuracy 0.3096
Step 12800: training accuracy 0.2150
    sample pred: [6 6 1 1 6 6 6 6 6 6 0 9 6 9 1 6 6 6 6 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [13. 21.  0.  0.  0.  0. 95. 14. 38. 34.]
Step 12800: val accuracy 0.2150
Step 12900: training accuracy 0.3180
    sample pred: [7 8 9 5 5 4 5 5 9 9 8 9 7 9 9 0 5 7 9 7]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [19.  2.  0.  0. 40. 41.  0. 74. 85. 57.]
Step 12

Step 15800: training accuracy 0.3170
    sample pred: [7 0 9 1 6 6 6 6 9 9 0 9 0 9 9 0 0 6 9 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [86. 21.  0.  0.  0.  0. 74. 54. 11. 71.]
Step 15800: val accuracy 0.2938
Step 15900: training accuracy 0.2670
    sample pred: [2 0 1 1 5 1 2 1 2 1 0 9 0 9 1 0 1 5 1 0]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [89. 57. 50.  1.  0. 27.  5.  0. 21. 17.]
Step 15900: val accuracy 0.2588
Step 16000: training accuracy 0.3220
    sample pred: [7 0 1 1 5 5 3 5 1 9 0 9 0 9 1 0 3 5 1 0]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [95. 44.  0. 28.  0. 47. 26. 34.  2. 46.]
Step 16000: val accuracy 0.2970
Step 16100: training accuracy 0.2870
    sample pred: [7 3 1 1 3 4 3 3 1 1 0 9 3 9 1 7 3 3 3 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 9. 57.  1. 66. 50.  

    sample pred: [7 0 9 1 5 4 4 4 4 9 0 9 4 9 1 0 4 5 1 4]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [83. 37.  0. 14. 74. 43.  0. 16.  2. 35.]
Step 18900: val accuracy 0.2916
Step 19000: training accuracy 0.2330
    sample pred: [7 0 0 1 5 5 7 5 7 7 0 7 7 9 1 0 0 7 1 0]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [98. 24.  0.  0.  0. 32.  0. 75.  2.  2.]
Step 19000: val accuracy 0.2180
Step 19100: training accuracy 0.3510
    sample pred: [7 2 9 1 5 2 2 5 2 9 0 9 2 9 1 0 1 7 1 2]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [39. 52. 67.  1.  1. 34. 25. 73.  0. 59.]
Step 19100: val accuracy 0.3274
Step 19200: training accuracy 0.2450
    sample pred: [4 1 1 1 3 4 4 4 1 1 8 1 1 1 1 4 1 4 1 1]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 1. 73.  5. 12. 65. 14. 21.  2. 42. 10.]
Step 19200: val a

    sample pred: [2 0 1 1 3 4 2 4 2 1 0 9 0 1 1 0 0 7 1 0]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [97. 38. 33.  3. 43.  0.  0. 36. 12.  7.]
Step 22000: val accuracy 0.2668
Step 22100: training accuracy 0.2910
    sample pred: [7 0 9 0 6 6 6 6 2 9 0 9 0 9 9 0 0 6 9 0]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [92.  5.  3.  0.  4.  3. 77. 26. 37. 44.]
Step 22100: val accuracy 0.2794
Step 22200: training accuracy 0.2680
    sample pred: [7 9 1 1 3 1 3 1 1 1 0 9 1 1 1 7 1 7 1 1]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 4. 77. 36. 17.  0.  0. 43. 57.  2. 32.]
Step 22200: val accuracy 0.2586
Step 22300: training accuracy 0.2730
    sample pred: [4 8 8 1 3 4 4 4 1 8 8 8 4 9 1 8 4 5 1 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 4. 26.  0. 18. 73. 27. 18.  5. 92. 10.]
Step 22300: val a

Step 25200: training accuracy 0.2910
    sample pred: [7 9 9 9 5 8 5 8 9 9 8 9 9 9 9 8 9 5 9 8]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [  2.   3.   0.   0.  24.  30.   3.  52.  71. 106.]
Step 25200: val accuracy 0.2582
Step 25300: training accuracy 0.2400
    sample pred: [7 5 1 5 5 4 5 5 2 9 0 5 5 9 1 5 5 5 5 5]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [12. 36.  6.  0. 29. 88.  7. 33.  0. 29.]
Step 25300: val accuracy 0.2340
Step 25400: training accuracy 0.2650
    sample pred: [7 0 0 1 5 1 0 1 1 9 0 9 0 9 1 0 0 7 1 0]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [102.  34.   1.   0.   0.  12.  30.  56.   3.  27.]
Step 25400: val accuracy 0.2456
Step 25500: training accuracy 0.3680
    sample pred: [2 9 9 1 3 4 3 3 2 9 0 9 3 9 1 0 3 3 3 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [3

    sample pred: [6 8 9 8 3 8 3 8 3 9 8 9 3 9 1 8 3 3 3 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 19.  0. 49. 10. 17. 43. 11. 94. 31.]
Step 28300: val accuracy 0.2578
Step 28400: training accuracy 0.2390
    sample pred: [2 4 1 1 6 4 4 4 2 1 0 5 4 1 1 4 2 4 1 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 7. 49. 27.  2. 76. 18. 50.  4.  6.  0.]
Step 28400: val accuracy 0.2414
Step 28500: training accuracy 0.2860
    sample pred: [7 0 1 1 3 4 3 3 1 9 0 3 0 9 1 0 3 3 1 0]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [98. 50.  1. 37. 41. 25.  0. 16.  0. 18.]
Step 28500: val accuracy 0.2742
Step 28600: training accuracy 0.2900
    sample pred: [7 8 1 1 5 4 4 4 1 9 8 9 4 9 1 4 4 4 1 4]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 39.  0.  0. 85. 14. 11. 34. 77. 30.]
Step 28600: val a

Step 31500: training accuracy 0.2880
    sample pred: [5 9 9 5 5 5 5 5 9 9 0 9 5 9 9 0 5 5 5 5]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [39. 26.  0.  3.  3. 82.  1. 45.  5. 84.]
Step 31500: val accuracy 0.2676
Step 31600: training accuracy 0.3500
    sample pred: [7 9 9 5 5 4 5 5 9 9 0 9 7 9 9 7 3 7 9 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [27.  2.  0. 12. 22. 49. 35. 83. 34. 86.]
Step 31600: val accuracy 0.3236
Step 31700: training accuracy 0.1990
    sample pred: [6 6 9 1 6 6 6 6 3 9 8 9 6 9 1 6 6 6 3 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 37.  0. 11.  0.  1. 92.  0. 16. 42.]
Step 31700: val accuracy 0.2040
Step 31800: training accuracy 0.2640
    sample pred: [2 4 9 1 5 4 4 4 2 9 0 9 4 9 1 4 4 5 1 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 4. 52.  9.  0. 68. 4

Step 34600: training accuracy 0.2740
    sample pred: [6 6 1 1 6 6 6 6 3 1 0 6 6 9 1 0 1 6 1 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [63. 52.  1. 16. 22.  0. 89. 11.  0. 20.]
Step 34600: val accuracy 0.2678
Step 34700: training accuracy 0.3030
    sample pred: [5 5 1 1 3 4 3 3 3 1 8 3 3 1 1 0 3 3 1 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [23. 46. 14. 54. 54. 25.  0.  1. 79.  7.]
Step 34700: val accuracy 0.2988
Step 34800: training accuracy 0.2970
    sample pred: [2 3 9 1 3 2 3 5 2 1 0 3 3 1 1 0 2 5 1 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [77. 41. 58. 48.  0. 23.  0. 27.  5. 18.]
Step 34800: val accuracy 0.2984
Step 34900: training accuracy 0.3060
    sample pred: [7 8 9 1 4 4 4 4 2 9 8 9 7 9 1 7 4 7 1 4]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 42.  3.  0. 71.  

Step 37700: training accuracy 0.2210
    sample pred: [4 9 9 1 4 4 4 4 1 9 8 9 1 9 1 4 1 4 9 4]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 62.  0.  0. 72.  0.  0.  4.  0. 83.]
Step 37700: val accuracy 0.2076
Step 37800: training accuracy 0.2990
    sample pred: [2 9 9 1 5 5 3 3 2 9 8 9 3 9 1 2 3 5 1 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0. 52. 45. 38. 12. 54. 26.  0. 11. 61.]
Step 37800: val accuracy 0.2862
Step 37900: training accuracy 0.2860
    sample pred: [2 9 9 9 3 3 3 3 2 9 8 9 9 9 9 9 3 3 9 3]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [  6.   0.  52.  42.   2.  30.  10.  22.  16. 106.]
Step 37900: val accuracy 0.2606
Step 38000: training accuracy 0.2870
    sample pred: [2 9 9 1 6 6 2 6 2 9 8 9 2 9 9 2 2 2 9 6]
    sample true: [6 7 9 0 5 2 3 3 3 9 0 9 2 9 1 0 2 3 9 6]
    correct predictions by class: [ 0.  9. 69.