In [1]:
# Necessary imports
import tensorflow as tf
import numpy as np
from imageio import imread
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import time
from caffe_classes import class_names

%matplotlib inline

### AlexNet implementation

In [2]:
# Retrieve the AlexNet param values
net_data = np.load('bvlc-alexnet.npy', encoding='latin1').item()

In [3]:
# Helper functions
def conv2d(input_vol, W, b, stride=1, padding='VALID', group=1):
    c_i = input_vol.get_shape().as_list()[-1]
    c_o = W.get_shape().as_list()[-1]
    
    assert(c_i % group == 0)
    assert(c_o % group == 0)
    convolve = lambda i, k: tf.nn.conv2d(i, k, strides=[1, stride, stride, 1], padding=padding)
    
    if group == 1:
        conv_layer = convolve(input_vol, W)
    else:
        input_groups = tf.split(input_vol, group, axis=3)
        kernel_groups = tf.split(W, group, axis=3)
        output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
        conv_layer = tf.concat(output_groups, axis=3)
    
    conv_layer = tf.nn.bias_add(conv_layer, b)
    return tf.nn.relu(conv_layer)

def maxpool2d(input_vol, k=2, stride=2):
    return tf.nn.max_pool(input_vol, ksize=[1, k, k, 1],
                          strides=[1, stride, stride, 1],
                          padding='VALID')

def alexnet(X, feature_extraction=False):
    # CONV1 Layer
    conv1W = tf.Variable(net_data['conv1'][0])
    conv1b = tf.Variable(net_data['conv1'][1])
    conv1 = conv2d(X, conv1W, conv1b, stride=4, padding='SAME', group=1) # Kernel=11x11x96. Strides=4x4. Group=1
    
    radius = 2; alpha = 2e-05; beta = 0.75; bias = 1.0
    conv1 = tf.nn.local_response_normalization(conv1,
                                               depth_radius=radius,
                                               alpha=alpha,
                                               beta=beta, bias=bias)     # NORM Layer
    conv1 = maxpool2d(conv1, 3)                                          # POOL Layer. Kernel=3x3. Strides=2x2. 
    
   # CONV2 Layer
    conv2W = tf.Variable(net_data['conv2'][0])
    conv2b = tf.Variable(net_data['conv2'][1])
    conv2 = conv2d(conv1, conv2W, conv2b, stride=1, padding='SAME', group=2) # Kernel=5x5x256. Strides=1x1. Group=2
    
    radius = 2; alpha = 2e-05; beta = 0.75; bias = 1.0
    conv2 = tf.nn.local_response_normalization(conv2,
                                               depth_radius=radius,
                                               alpha=alpha,
                                               beta=beta, bias=bias)         # NORM Layer
    conv2 = maxpool2d(conv2, 3)                                              # POOL Layer. Kernel=3x3. Strides=2x2. 
    
    # CONV3 Layer
    conv3W = tf.Variable(net_data['conv3'][0])
    conv3b = tf.Variable(net_data['conv3'][1])
    conv3 = conv2d(conv2, conv3W, conv3b, stride=1, padding='SAME', group=1) # Kernel=3x3x384. Strides=1x1. Group=1
    
    # CONV4 Layer
    conv4W = tf.Variable(net_data['conv4'][0])
    conv4b = tf.Variable(net_data['conv4'][1])
    conv4 = conv2d(conv3, conv4W, conv4b, stride=1, padding='SAME', group=2) # Kernel=3x3x384. Strides=1x1. Group=2
    
    # CONV5 Layer
    conv5W = tf.Variable(net_data['conv5'][0])
    conv5b = tf.Variable(net_data['conv5'][1])
    conv5 = conv2d(conv4, conv5W, conv5b, stride=1, padding='SAME', group=2) # Kernel=3x3x256. Strides=1x1. Group=2
    conv5 = maxpool2d(conv5, 3)                                              # POOL Layer. Kernel=3x3. Strides=2x2. 
    
    # FC6 Layer. 4096
    fc6W = net_data['fc6'][0]
    fc6b = net_data['fc6'][1]
    fc6 = tf.contrib.layers.flatten(conv5)
    fc6 = tf.nn.relu(tf.nn.xw_plus_b(fc6, fc6W, fc6b))
    
    # FC7 Layer. 4096
    fc7W = net_data['fc7'][0]
    fc7b = net_data['fc7'][1]
    fc7 = tf.nn.relu(tf.nn.xw_plus_b(fc6, fc7W, fc7b))
    
    if feature_extraction:
        return fc7
    
    # FC8 Layer. 1000
    fc8W = net_data['fc8'][0]
    fc8b = net_data['fc8'][1]
    logits = tf.nn.xw_plus_b(fc7, fc8W, fc8b)
    probabilities = tf.nn.softmax(logits)
    return probabilities

### Inference on ImageNet

In [24]:
tf.reset_default_graph()

# Get the sample images for inference
img1 = (imread("poodle.png")[:, :, :3]).astype(np.float32)
img2 = (imread("weasel.png")[:, :, :3]).astype(np.float32)

# Perform pre-processing
img1 = img1 - np.mean(img1)
img2 = img2 - np.mean(img2)

# Define the placholders/hyperparams
X = tf.placeholder(tf.float32, shape=[None, 227, 227, 3])

# Define/Implement the model architecture
probs = alexnet(X)

# Perform the inference using the model
start = time.time()
with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    output = session.run(probs, feed_dict={X: [img1, img2]})
    
# Print the output
for img_idx in range(output.shape[0]):
    idxs = np.argsort(output[img_idx, :])
    print('Image {}'.format(img_idx))
    
    for i in range(5):
        print('{}: {:.4f}'.format(class_names[idxs[-1-i]], output[img_idx, idxs[-1 -i]]))
    print()
    
print('Time taken: {:.3f}'.format(time.time() - start))

Image 0
miniature poodle: 0.3895
toy poodle: 0.2231
Bedlington terrier: 0.1730
standard poodle: 0.1496
komondor: 0.0258

Image 1
weasel: 0.3313
polecat, fitch, foulmart, foumart, Mustela putorius: 0.2803
black-footed ferret, ferret, Mustela nigripes: 0.2105
mink: 0.0814
Arctic fox, white fox, Alopex lagopus: 0.0268

Time taken: 3.704


### Inference on Traffic Sign Dataset

### Feature extraction using AlexNet

### Training Feature Extractor for classification of images from the German Traffic Sign Dataset

In [None]:
# Define placeholders
IMAGE_SIZE = 227
EPOCHS = 10
BATCH_SIZE = 128
LEARNING_RATE = 0.001
N_LABELS = 43 # Number of labels for the GTS data set

X = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3])
y = tf.placeholder(tf.int32, shape=[None])
y_one_hot = tf.one_hot(y, N_LABELS)
learning_rate = tf.placeholder(tf.float32)
