# Hands-on tutorial: Traffic sign classifier with Tensorflow

## Preparation steps
* You can download this jupyter notebook: `git clone https://github.com/olesalscheider/traffic-sign-tutorial`
* Please install Tensorflow
 * `pip3 install tensorflow` for the CPU variant
 * `pip3 install tensorflow-gpu` if you have a GPU with CUDA and CUDNN support
 * More details on https://www.tensorflow.org/install/
* Download and extract the traffic sign dataset (GTSRB) for this tutorial:
 * http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Training_Images.zip
 * Unzip the dataset
 * Convert images to png
 * ... or execute `data/download.sh`

## Prepare the dataset

Let's start by splitting the data into a train and a test dataset. We store the filenames in two CSV files and use approximately 80% of the data for training:

In [1]:
import os
import numpy as np

DATA_PATH = os.path.join('data', 'GTSRB', 'Final_Training') # Path to the extracted data

with open(os.path.join(DATA_PATH, 'train'), 'w') as train_file, open(os.path.join(DATA_PATH, 'test'), 'w') as test_file:
    # Iterate over all image files in the training data directory and store the paths in the CSV files
    for dirpath, dirnames, files in os.walk(DATA_PATH):
        is_train_example = {}
        for file in files:
            if file.endswith('.png'):
                _, label = os.path.split(dirpath) # The last directory name encodes the class of the training example
                label = int(label) # Convert it to an integer (this strips the leading zeros)

                # There are multiple images of each sign. The number before the '_' gives the sign number.
                # Make sure that different images of the same sign are only stored either in the training
                # or the test set.
                sign_no = int(file.split('_')[0])

                # Generate the string that should be stored in the CSV file. It is the image path and the class label.
                line = os.path.join(dirpath, file) + '\t' + str(label) + '\n'

                # Store the line either in the training or test CSV file
                if not sign_no in is_train_example.keys():
                    is_train_example[sign_no] = np.random.randint(0, 10) > 1 # keep 80% of the data for training
                if is_train_example[sign_no]:
                    train_file.writelines(line)
                else:
                    test_file.writelines(line)

## Define a data reader

In [2]:
import tensorflow as tf

graph = tf.Graph()
with graph.as_default():

    # Define a function that takes a line from the CSV file and returns the decoded image and label
    def read_data(line):
        path, label = tf.decode_csv(line, [[''], [0]], field_delim='\t') # Decode the line
        file = tf.read_file(path) # Read the binary data from the image file
        image = tf.image.decode_png(file, 3) # Decode the image

        # Resize the image to 48x48 pixels
        image = tf.expand_dims(image, axis=0)
        image = tf.image.resize_bilinear(image, [48, 48])
        image = tf.squeeze(image, axis=0)
        image.set_shape([48, 48, 3])
        return image, label

    # Create the training dataset
    train_dataset = tf.data.TextLineDataset(os.path.join(DATA_PATH, 'train'))
    train_dataset = train_dataset.shuffle(30000) # Shuffle the training dataset
    train_dataset = train_dataset.map(read_data, 2) # Call the previously defined function for each entry
    train_dataset = train_dataset.repeat(2) # Repeat the dataset 2 times
    train_dataset = train_dataset.batch(32) # Create batches with 32 training examples

    # Create the test dataset
    test_dataset = tf.data.TextLineDataset(os.path.join(DATA_PATH, 'test'))
    test_dataset = test_dataset.map(read_data, 2) # Call the previously defined function for each entry
    test_dataset = test_dataset.batch(1)

    # Create a generic iterator
    iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)

    # Create initializer operations for the iterator. These assign either the test of train dataset
    train_init_op = iterator.make_initializer(train_dataset)
    test_init_op = iterator.make_initializer(test_dataset)

## Define the model

First we define a class for one ResNet module:

In [3]:
REGULARIZER_WEIGHT = 1e-5

class ResnetModule(tf.keras.Model):
    def __init__(self, name, num_output_channels):
        super().__init__(name=name)
        self.num_output_channels = num_output_channels

    def build(self, input_shapes):
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.bn2 = tf.keras.layers.BatchNormalization()

        self.conv1 = tf.keras.layers.Conv2D(self.num_output_channels,
            (3, 3),
            padding='same',
            kernel_initializer=tf.keras.initializers.glorot_normal(),
            kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER_WEIGHT),
            name='conv1')

        self.conv2 = tf.keras.layers.Conv2D(self.num_output_channels,
            (3, 3),
            padding='same',
            kernel_initializer=tf.keras.initializers.glorot_normal(),
            kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER_WEIGHT),
            name='conv2')

        self.conv3 = None
        if input_shapes[-1] != self.num_output_channels:
            self.conv3 = tf.keras.layers.Conv2D(self.num_output_channels,
                (1, 1),
                kernel_initializer=tf.keras.initializers.glorot_normal(),
                kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER_WEIGHT),
                name='conv3')
        super().build(input_shapes)

    def call(self, x):
        residual = x
        x = self.bn1(x, training=True)
        x = tf.keras.activations.relu(x)
        if self.conv3:
            residual = self.conv3(x)
        x = self.conv1(x)
        x = self.bn2(x, training=True)
        x = tf.keras.activations.relu(x)
        x = self.conv2(x)
        return x + residual

Now we define a simple model that consists of some ResNet modules:

In [4]:
class TrafficSignModel(tf.keras.Model):
    def __init__(self, name):
        super().__init__(name=name)

        self.first_conv = tf.keras.layers.Conv2D(32,
            (7, 7),
            strides=(2, 2),
            activation=tf.keras.activations.relu,
            kernel_initializer=tf.keras.initializers.glorot_normal(),
            kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER_WEIGHT),
            name='first_conv')

        self.bn1 = tf.keras.layers.BatchNormalization()

        self.maxpool1 = tf.keras.layers.MaxPooling2D(2, 2)
        self.maxpool2 = tf.keras.layers.MaxPooling2D(2, 2)
        self.maxpool3 = tf.keras.layers.MaxPooling2D(2, 2)
        self.flatten = tf.keras.layers.Flatten()

        self.module1 = ResnetModule('rm1', 64)
        self.module2 = ResnetModule('rm2', 128)
        self.module3 = ResnetModule('rm3', 256)

        self.fc = tf.keras.layers.Dense(43) # We have 43 classes

    def call(self, image):
        # Cast the image to float
        x = tf.cast(image, tf.float32)
        # normalize it to a range between -1 and 1
        x = (x - tf.constant(128.0, tf.float32)) / tf.constant(128.0, tf.float32)

        # Run the neural network layers on the image
        x = self.first_conv(x)
        x = self.bn1(x, training=True)
        x = self.module1(x)
        x = self.maxpool1(x)
        x = self.module2(x)
        x = self.maxpool2(x)
        x = self.module3(x)
        x = self.maxpool3(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

## Train and evaluate the model

Now that we have a data reader and defined the model, we can train it.

In [5]:
with graph.as_default():
    session = tf.Session()

    # Instantiate the model we want to train
    net = TrafficSignModel('net')
    images, labels = iterator.get_next()
    logits = net(images)

    # Define the loss
    loss_op = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits))
    loss_op += tf.reduce_sum(net.losses) # Add regularizer losses

    # Define an OP to calculate the accuracy
    correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), labels)
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Define the learning rate and the optimizer.
    global_step = tf.Variable(0, name='global_step', trainable=False)
    learning_rate = tf.train.exponential_decay(1e-3, global_step, 500, 0.5)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss_op, global_step=global_step)
    train_op = tf.group(train_op, net.updates) # Add batch norm updates

    # Initialize the model variables (randomly).
    session.run(tf.global_variables_initializer())
    
    print('Train the traffic light classifier. This might take a while...')
    # Initialize the dataset iterator for training.
    session.run(train_init_op)
    i = 0
    while True:
        try:
            _, accuracy, loss = session.run([train_op, accuracy_op, loss_op])
            if i % 20 == 0:
                print('Step %4i - Accuracy: %.4f, loss: %.4f' % (i, accuracy, loss))
            i += 1
        except tf.errors.OutOfRangeError:
            break # We finished!
    
    print('Evaluate the classifier. This might take a while...')
    # Initialize the dataset iterator for training.
    session.run(test_init_op)
    i = 0
    total_accuracy = 0.0
    while True:
        try:
            accuracy = session.run(accuracy_op)
            total_accuracy += accuracy
            i += 1
        except tf.errors.OutOfRangeError:
            break # We finished!
    print('Mean accuracy on the test dataset: %.4f' % (total_accuracy / i))

Train the traffic light classifier. This might take a while...
Step    0 - Accuracy: 0.0000, loss: 151.3727
Step   20 - Accuracy: 0.2188, loss: 106.7918
Step   40 - Accuracy: 0.2812, loss: 83.1193
Step   60 - Accuracy: 0.2812, loss: 73.3664
Step   80 - Accuracy: 0.7812, loss: 35.9986
Step  100 - Accuracy: 0.5938, loss: 49.1144
Step  120 - Accuracy: 0.5938, loss: 41.8743
Step  140 - Accuracy: 0.7812, loss: 20.0612
Step  160 - Accuracy: 0.8750, loss: 13.3664
Step  180 - Accuracy: 0.9062, loss: 11.2692
Step  200 - Accuracy: 0.8750, loss: 13.4957
Step  220 - Accuracy: 0.7812, loss: 15.9311
Step  240 - Accuracy: 1.0000, loss: 3.2403
Step  260 - Accuracy: 0.9375, loss: 5.7716
Step  280 - Accuracy: 1.0000, loss: 1.4531
Step  300 - Accuracy: 1.0000, loss: 1.3075
Step  320 - Accuracy: 0.9688, loss: 2.5226
Step  340 - Accuracy: 0.9375, loss: 4.7758
Step  360 - Accuracy: 1.0000, loss: 0.6766
Step  380 - Accuracy: 1.0000, loss: 4.0926
Step  400 - Accuracy: 0.9688, loss: 2.5101
Step  420 - Accuracy