In [0]:
import tensorflow as tf
import numpy as np

In [0]:
num_class = 10
image_height = 32
image_width = 32
num_training_instance = 50000
num_test_instance = 10000

In [0]:
training_data, test_data = tf.keras.datasets.cifar10.load_data()

In [0]:
x_train = training_data[0]
y_train = training_data[1]
x_test = test_data[0]
y_test = test_data[1]

In [5]:
x_train.shape

(50000, 32, 32, 3)

In [6]:
y_train.shape

(50000, 1)

In [0]:
# need to convert to one hot vectors
def get_one_hot_labels(normal_label, num_class):
  num_labels = normal_label.shape[0]
  one_hot_label = np.zeros([num_labels, num_class])
  for i in range(num_labels):
    current_index = normal_label[i]
    one_hot_label[i][current_index] += 1
  return one_hot_label

In [0]:
y_train_one_hot = get_one_hot_labels(y_train, num_class)

In [9]:
y_train_one_hot.shape

(50000, 10)

In [0]:
y_test_one_hot = get_one_hot_labels(y_test, num_class)

In [11]:
y_test_one_hot.shape

(10000, 10)

In [0]:
# Prepare tf data for input into model

## setting some training variables

batch_size = 64

num_train_iteration_per_epoch = num_training_instance // batch_size
if num_training_instance % batch_size != 0:
  num_train_iteration_per_epoch += 1
  
num_test_iteration_per_epoch = num_test_instance // batch_size
if num_test_instance % batch_size != 0:
  num_test_iteration_per_epoch += 1

In [0]:
# dataset_training = tf.data.Dataset.from_tensor_slices((x_train, y_train_one_hot)).batch(batch_size).repeat()
dataset_training = tf.data.Dataset.from_tensor_slices((x_train, y_train_one_hot)).shuffle(1000).batch(batch_size).repeat()

In [0]:
# going to create one more set of training dataset that is not batched but for testing training accuracy
# dataset_training_non_batch = tf.data.Dataset.from_tensor_slices((x_train, y_train_one_hot)).batch(num_training_instance).repeat()

# going to do a running metric instead of 1 shot evaluation of metrics.
# dataset_training_non_batch = tf.data.Dataset.from_tensor_slices((x_train, y_train_one_hot)).shuffle(1000).batch(num_training_instance).repeat()

In [0]:
# might change this one to a running test metric too
dataset_test = tf.data.Dataset.from_tensor_slices((x_test, y_test_one_hot)).batch(batch_size).repeat()

In [0]:
iterator = tf.data.Iterator.from_structure(dataset_training.output_types, dataset_training.output_shapes)

In [0]:
train_init = iterator.make_initializer(dataset_training)
test_init = iterator.make_initializer(dataset_test)

# train_whole_init = iterator.make_initializer(dataset_training_non_batch)

In [0]:
image_batch, label_batch = iterator.get_next()

In [0]:
# setting up the metrics --- accuracy and mean(for running cost)
# tf_accuracy, tf_accuracy_update = tf.metrics.accuracy(y_predicted, label_batch_stopped)

# tf_cost, tf_cost_update = tf.metrics.mean(cost)

In [0]:
# dropouts suppose to be 0.3, 0.3, 0.6, 0.6

######Need to use a placeholder to control dropout during training and turn off during evaluation, by default dropout is disabled, thats why overfitting
is_training_mode = tf.placeholder_with_default(True, shape=())

# write model

input_cast_layer = tf.cast(image_batch, tf.float32)

conv1 = tf.layers.conv2d(input_cast_layer, filters=64, kernel_size=[3, 3], strides=1, activation='relu', padding='same', name='conv_layer_1')

# somehow things got from stagnanting at 50% accuracy for test set to close to 68 percent accuracy when i change stride from 1 to 2??? why??
pool1 = tf.layers.max_pooling2d(conv1, pool_size=[2, 2], strides=2, padding='same', name='max_pool_layer_1')

batch_norm1 = tf.layers.batch_normalization(pool1, name='batch_norm_1')
# batch_norm1 = pool1

conv_drop1 = tf.layers.dropout(batch_norm1, rate=0.3, seed=22, training=is_training_mode, name='conv_drop_1')

conv2 = tf.layers.conv2d(conv_drop1, filters=64, kernel_size=[3, 3], strides=1, activation='relu', padding='same', name='conv_layer_2')

pool2 = tf.layers.max_pooling2d(conv2, pool_size=[2, 2], strides=[2, 2], padding='same', name='max_pool_layer_2')

batch_norm2 = tf.layers.batch_normalization(pool2, name='batch_norm_2')
# batch_norm2 = pool2
conv_drop2 = tf.layers.dropout(batch_norm2, rate=0.4, seed=25, training=is_training_mode, name='conv_drop_2')

flat_layer = tf.layers.flatten(batch_norm2)

dense0 = tf.layers.dense(flat_layer, units=256, activation='relu', name='dense_layer_0')

dropout0 = tf.layers.dropout(dense0, rate=0.6, seed=1, training=is_training_mode, name='dropout_layer_0')

dense1 = tf.layers.dense(dropout0, units=128, activation='relu', name='dense_layer_1')

dropout = tf.layers.dropout(dense1, rate=0.6, seed=42, training=is_training_mode, name='dropout_layer_1')
######## before this dense2 is a softmax, which hindered the learning, because softmax cross entropy applied softmax again, breaking the learning
######## https://stackoverflow.com/questions/50032197/tensorflow-gradients-are-0-weights-are-not-updating

dense2 = tf.layers.dense(dropout, units=num_class, activation=None, name='dense_layer_2_softmax_output')

# dense2 = tf.layers.dense(dropout, units=num_class, activation='relu', name='dense_layer_2_softmax_output')

# defining the output of the model

y_predicted = dense2

y_predicted_class = tf.argmax(y_predicted, axis=1)

# stopping backprop to gradients
label_batch_stopped = tf.stop_gradient(label_batch)

# getting ground truth label class indices
label_class = tf.argmax(label_batch_stopped, axis=1)

cost_function = tf.nn.softmax_cross_entropy_with_logits_v2(labels=label_batch_stopped, logits=dense2)

cost = tf.reduce_mean(cost_function)

optimizer = tf.train.AdamOptimizer()

train = optimizer.minimize(cost)

# defining metrics

tf_accuracy, tf_accuracy_update = tf.metrics.accuracy(y_predicted_class, label_class, name='accuracy_metrics')

tf_cost, tf_cost_update = tf.metrics.mean(cost, name='cost_metrics')


# Not using the ones below
# correct_prediction = tf.equal(y_predicted_class, tf.argmax(label_batch, axis=1))

# accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# accuracy = tf.metrics.accuracy(y_predicted, label_batch)


In [0]:
# getting the running variables for accuracy and cost metrics
accuracy_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope='accuracy_metrics')
cost_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope='cost_metrics')

In [0]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())

In [0]:
# sess.run(tf.variables_initializer(accuracy_vars))

In [0]:
training_feed_dict = {is_training_mode : True}
testing_feed_dict = {is_training_mode : False}

In [25]:
# training with evaluation after every epoch

num_epoch = 200

for i in range(num_epoch):
  print('\n')
  # print the current number of epoch
  print('Epoch: ', i)
  
  # reset the metrics
  sess.run(tf.variables_initializer(accuracy_vars))
  sess.run(tf.variables_initializer(cost_vars))
  
  # initialize the training dataset
  sess.run(train_init)
  
  # run the training iterations for 1 epoch
  for j in range(num_train_iteration_per_epoch):
    # for every batch
    sess.run(train, feed_dict=training_feed_dict)
    # keep track of accuracy and loss
    sess.run([tf_accuracy_update, tf_cost_update])
  
  print('Training data')
  print('Training loss: ', sess.run(tf_cost))
  print('Training accuracy: ', sess.run(tf_accuracy))
  
  
  # reset the metrics
  sess.run(tf.variables_initializer(accuracy_vars))
  sess.run(tf.variables_initializer(cost_vars))
    
  # initialize the test dataset
  sess.run(test_init)
  
  for _ in range(num_test_iteration_per_epoch):
    sess.run([cost, tf_cost_update, tf_accuracy_update], feed_dict=testing_feed_dict)
  
  print('\n')
  print('Test data')
  # evaluate the current training progress over the test data and print them
  ## take in input data
#   sess.run([iterator.get_next()])
  
#   sess.run(tf.local_variables_initializer())
  
  print('Loss: ', sess.run(tf_cost))
  
  print('Accuracy: ', sess.run(tf_accuracy))
  
  ####### going to test the model on training to understand the reduction in accuracy as training proceeds
  
#   print('Training data')
  
#   sess.run(train_whole_init)
  




Epoch:  0
Training data
Training loss:  3.0012012
Training accuracy:  0.12592088


Test data
Loss:  2.2685301
Accuracy:  0.1466


Epoch:  1
Training data
Training loss:  2.2129273
Training accuracy:  0.16768098


Test data
Loss:  2.1460645
Accuracy:  0.2181


Epoch:  2
Training data
Training loss:  2.026087
Training accuracy:  0.23182255


Test data
Loss:  1.9039735
Accuracy:  0.2868


Epoch:  3
Training data
Training loss:  1.8665513
Training accuracy:  0.29204035


Test data
Loss:  1.7555264
Accuracy:  0.3407


Epoch:  4
Training data
Training loss:  1.763046
Training accuracy:  0.33924568


Test data
Loss:  1.6406157
Accuracy:  0.3669


Epoch:  5
Training data
Training loss:  1.6755065
Training accuracy:  0.38298768


Test data
Loss:  1.5239698
Accuracy:  0.43


Epoch:  6
Training data
Training loss:  1.6036227
Training accuracy:  0.41493833


Test data
Loss:  1.4554404
Accuracy:  0.4659


Epoch:  7
Training data
Training loss:  1.5367266
Training accuracy:  0.4424247


Test data


# Ignore what is below, only notes to self and trying stuff while debugging

In [0]:
# will stop the session here so that we can start a new session with dataset shuffling later

# sess.close()

In [23]:
# need to debug why the loss keeps increasing - loss is over test set, did we overfit training set by not shuffling?

'''The model has overfitted to the training data, perhaps we will shuffle the order of the data at every epoch'''

'The model has overfitted to the training data, perhaps we will shuffle the order of the data at every epoch'

Now we will shuffle the dataset by calling the built in function

Need to go in this order, dataset.shuffle(no seed here).batch.repeat() to get 1 shuffle for 1 epoch, then batched, then next. repeat before shuffe will result in some appearing more than once in one epoch and some none at all.

If batch comes before the shuffle, then the elements within the batch is in the original order, only the batch order is shuffled.

In [0]:
dataset_training = tf.data.Dataset.from_tensor_slices((x_train, y_train_one_hot)).shuffle(1000).batch(batch_size).repeat()

In [0]:
# going to create one more set of training dataset that is not batched but for testing training accuracy
dataset_training_non_batch = tf.data.Dataset.from_tensor_slices((x_train, y_train_one_hot)).shuffle(1000).batch(num_training_instance).repeat()

In [0]:
dataset_test = tf.data.Dataset.from_tensor_slices((x_test, y_test_one_hot)).shuffle(200).batch(num_test_instance).repeat()

In [0]:
iterator = tf.data.Iterator.from_structure(dataset_training.output_types, dataset_training.output_shapes)

In [0]:
train_init = iterator.make_initializer(dataset_training)
test_init = iterator.make_initializer(dataset_test)

train_whole_init = iterator.make_initializer(dataset_training_non_batch)

In [0]:
image_batch, label_batch = iterator.get_next()

In [0]:
# sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())

In [29]:
# training with evaluation after every epoch

num_epoch = 10

for i in range(num_epoch):
  # print the current number of epoch
  print('Epoch: ', i)
  
  # initialize the training dataset
  sess.run(train_init)
  
  # run the training iterations for 1 epoch
  for _ in range(num_train_iteration_per_epoch):
    sess.run(train)
    
  # initialize the test dataset
  sess.run(test_init)
  
  
  print('Test data')
  # evaluate the current training progress over the test data and print them
  ## take in input data
#   sess.run([iterator.get_next()])
  
#   sess.run(tf.local_variables_initializer())
  
  print('Loss: ', sess.run(cost))
  
  print('Accuracy: ', sess.run(accuracy))
  
  ####### going to test the model on training to understand the reduction in accuracy as training proceeds
  
#   num_correct = 0
  
#   print('Training data')
  
#   sess.run(train_init)
#   for _ in range(num_train_iteration_per_epoch):
    
#   print('Training loss: ', sess.run(cost))
#   print('Training accuracy: ', sess.run(accuracy))

Epoch:  0


ResourceExhaustedError: ignored

In [36]:
! nvidia-smi

Fri Aug  3 07:38:35 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.111                Driver Version: 384.111                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    70W / 149W |  10936MiB / 11439MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [0]:
import sys

In [0]:
sess.run(train_whole_init)

In [32]:
sys.getsizeof(iterator)

56

In [0]:
temp = sess.run(image_batch)

In [35]:
temp.shape

(50000, 32, 32, 3)