# Inception Modules

Since you already learned how to build a CNN using low-level TF libraries, we won't bother with that here. Let's use tf.layers to easily create our model. 

Let's first donwload our data. Also make sure you are running this notebook in GPU node.

In [0]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [0]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar100.load_data()

In [0]:
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
y_train = y_train.ravel().astype('int64')
y_test = y_test.ravel().astype('int64')

In [0]:
conv2d = tf.layers.conv2d
max_pooling2d = tf.layers.max_pooling2d
average_pooling2d = tf.layers.average_pooling2d
relu = tf.nn.relu

Take a look at what the high-level architecture of our CNN is gonna look like.

In [0]:
def cnn_model_fn(features, labels, mode, params=None):
  
  # Reshape X to 4-D tensor: [batch_size, width, height, channels]
  input_layer = tf.reshape(features["x"], [-1, 32, 32, 3])
 
  # MODULE 1 - Output Tensor Shape: [batch_size, 32, 32, 96]
  mod1 = module1(input_layer)
  
  # do a 2x2 maxpool with 2 strides
  # Output Tensor Shape: [batch_size, 16, 16, 96]
  pool1 = ...
  
  # MODULE 2 - Output Tensor Shape: [batch_size, 16, 16, 96]
  mod2 = module2(pool1)
  
  # RESNET
  # don't worry about this until the very end

  # reduce dimensions - do a conv 1x1 kernel filter with relu
  # Output Tensor Shape: [batch_size, 16, 16, 24]
  conv = ...

  # do a 2x2 average pool with 2 strides
  # Output Tensor Shape: [batch_size, 8, 8, 24]
  pool2 = ...
  
  # Output Tensor Shape: [batch_size, 8 * 8 * 24]
  pool2_flat = tf.reshape(pool2, [-1, 8 * 8 * 24])

  # do a dense layer with 1000 units with relu
  # Output Tensor Shape: [batch_size, 1000]
  dense = ...
  
  dropout = tf.layers.dropout(
      inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

  # Output Tensor Shape: [batch_size, 100]
  logits = tf.layers.dense(inputs=dropout, units=100)

  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
  
  
  # Don't worry about anything below ------------------------------------------

  predictions = {
      "classes": tf.argmax(input=logits, axis=1),
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
  }
  
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.AdamOptimizer(learning_rate=params['lr'])
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  if mode == tf.estimator.ModeKeys.EVAL:
    eval_metric_ops = {
        "accuracy": tf.metrics.accuracy(
            labels=labels, predictions=predictions["classes"])}
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

This is going to be a naive module. We want 3 branches - one for each kernel dimension (1x1, 3x3, 5x5). Then we're going to do a depth-wise concatenation and return the output. For each branch, use a convolution on the input with 32 output filters, stride=1, padding=same, and relu activation. Finally, do a depth-wise concatenation of all 3 branches (essentially stacking them on top of each other).

In [0]:
def module1(input_layer):
  # Input Tensor Shape: [batch_size, 32, 32, 3]
  
  # 1x1 convolution
  # Output Tensor Shape: [batch_size, 32, 32, 32]
  branch1 = ...

  # 3x3 convolution
  # Output Tensor Shape: [batch_size, 32, 32, 32]
  branch2 = ...

  # 5x5 convolution
  # Output Tensor Shape: [batch_size, 32, 32, 32]
  branch3 = ...
  
  # Output Tensor Shape: [batch_size, 32, 32, 96]
  module = ...
  
  return module

Now we're going to do the better inception module. What this means is that we add a dimensionality reduction before the 3x3 and 5x5 kernels with a 1x1 feature map reduction convolution. Let's also add a 4th branch that does a 3x3 maxpool followed by a 1x1 dimensionality reduction convolution. Then we can concatenate them all.


In [0]:
def module2(input_layer):
  # Input Tensor Shape: [batch_size, 16, 16, 96]
  
  # 1x1 convolution
  # Output Tensor Shape: [batch_size, 16, 16, 24]
  branch1 = ...
  
  # 1x1 conv followed by 3x3 conv, same filter #s
  # Output Tensor Shape: [batch_size, 16, 16, 24]
  branch2 = ...
  branch2 = ...
  
  # 1x1 conv followed by 5x5 conv, same filter #s
  # Output Tensor Shape: [batch_size, 16, 16, 24]
  branch3 = ...
  branch3 = ...
  
  # 3x3 maxpool followed by 1x1 conv
  # Output Tensor Shape: [batch_size, 16, 16, 24]
  branch4 = ...
  branch4 = ...

  # Output Tensor Shape: [batch_size, 16, 16, 96]
  module = ...
  
  return module

In [0]:
# Create the Estimator
classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, params={'lr':0.001})

# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": X_train},
    y=y_train,
    batch_size=250,
    num_epochs=None,
    shuffle=True)

# Evaluate the model
eval_input_fn = lambda X, y: tf.estimator.inputs.numpy_input_fn(
    x={"x": X},
    y=y,
    num_epochs=1,
    shuffle=False)

In [0]:
train_accs, val_accs = [], []
for i in range(5):
  classifier.train(input_fn=train_input_fn, steps=500 if i else 1)
  train_accs.append(classifier.evaluate(input_fn=eval_input_fn(X_train, y_train)))
  val_accs.append(classifier.evaluate(input_fn=eval_input_fn(X_test, y_test)))
  print('train acc:', train_accs[-1])
  print('val acc:', val_accs[-1])

In [0]:
plt.plot([t['loss'] for t in train_accs], label='train')
plt.plot([t['loss'] for t in val_accs], label='val')
plt.legend()
plt.show()