# TLdetect

An implementation of a convolutional neural network to detect the state of traffic lights in images. The states are red, green, yellow and no traffic light. Using the **tensorflow** framework.

### Importing packages

In [None]:
from PIL import Image
import tensorflow as tf
import numpy as np
import pandas as pd
import random
import src.data_processing as data

### Pre-processing

Loading the images and labels and treating the dataset to fit better on our model. Note that the images won't be loaded right now, only their file names, in order to save memory and avoid bottlenecks and errors.

In [None]:
# Loading the labels
# At this moment, the conv net will only classify if an image has or not a traffic light.
labels = data.load_labels()
print("Number of labels loaded: %d" % len(labels))

In [None]:
# Loading the images. In reality, just their names
images = labels['file']
X_train, X_val, X_test = data.split_dataset(images)

# Debug
print("Data set of %d images split in 3 sets.\n" % len(images))
print("Train: {}".format(X_train.shape))
print("Validation: {}".format(X_val.shape))
print("Test: {}".format(X_test.shape))

In [None]:
# Spliting the labels
y_train = data.extract_labels(labels, X_train)
y_val = data.extract_labels(labels, X_val)
y_test = data.extract_labels(labels, X_test)

### Creating the model

At the following cells we will define our model. Our model, at this moment, will just classify if a given image has a traffic light or not.

#### The following architecture will be used:
1. **Convolutional layer**
+ **Pooling layer**
+ **Convolutional layer**
+ **Pooling layer**
+ **Affine layer**
+ **Classification**

**Defining useful functions**

In [None]:
from tensorflow.contrib.keras import layers, models

model = models.Sequential()
model.add(layers.Conv2D(filters=5, kernel_size=3, strides=1,
                        padding='same',
                        data_format='channels_last',
                        input_shape=(1920, 1200, 3),
                        activation='relu'
                       ))
model.add(layers.MaxPool2D(pool_size=2, strides=2))
model.add(layers.Conv2D(filters=4, kernel_size=3, strides=1,
                        padding='same',
                        activation='relu'
                       ))
model.add(layers.MaxPool2D(pool_size=2, strides=2))
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dense(2))

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
test = X_train[0]
print(test)
#img = np.array('')
#for i in range(2):
img = np.array(Image.open('data/object-dataset/' + test))
img = img.reshape((1, 1920, 1200, 3))

print(img.shape)
#label_y = ['car']
#model.fit(img, label_y)

In [None]:
y_train.shape

In [None]:
def convolution(inputs, W, stride=(1, 1, 1, 1), padding='SAME'):
    """
        Given inputs and weights, applies a convolution to the images.
        Note:
            inputs has the shape (N, H, W, C)
            and weights has the shape (H, W, C, K)
            where:
            N = Number of inputs
            H = Height
            W = Width
            C = Channel
            K = Number of kernels
    """
    return tf.nn.conv2d(inputs, W, strides=stride, padding=padding)

def max_pooling(inputs, window_size=(1, 2, 2, 1), stride=(1, 2, 2, 1),
                padding='SAME'):
    """
        Applies a 2x2 max-pooling in a given image. Reduces the resolution.
    """
    return tf.nn.max_pool(inputs, ksize=window_size, strides=stride,
                          padding=padding)

**Defining hyperparameters, weights and biases**

In [None]:
# TODO: Improve the implementation. The model is fitted only for one type. Generalize more.

# Defining inputs
X = tf.placeholder(tf.float32, shape=[None, 1200, 1920, 3])
Y = tf.placeholder(tf.float32, shape=[None, 2])
# Parameters
conv_0_params = {}
conv_1_params = {}
fc_0_params = {}
fc_1_params = {}
conv_0_params['W'] = tf.Variable(tf.random_normal((3, 3, 3, 4), mean=0, stddev=1e-2),
                        dtype=tf.float32)
conv_0_params['b'] = tf.Variable(tf.zeros(4))

conv_1_params['W'] = tf.Variable(tf.random_normal((3, 3, 4, 4), mean=0, stddev=1e-2),
                        dtype=tf.float32)
conv_1_params['b'] = tf.Variable(tf.zeros(4))

fc_0_params['W'] = tf.Variable(tf.random_normal((300*480*4, 100), mean=0, stddev=1e-2),
                        dtype=tf.float32)
fc_0_params['b'] = tf.Variable(tf.zeros(100))

fc_1_params['W'] = tf.Variable(tf.random_normal((100, 2), mean=0, stddev=1e-2),
                        dtype=tf.float32)
fc_1_params['b'] = tf.Variable(tf.zeros(2))

**Defining the architecture itself**

In [None]:
# Defining the architecture
conv_0 = tf.nn.relu(convolution(X, conv_0_params['W']) + conv_0_params['b'])
pool_0 = max_pooling(conv_0)
conv_1 = tf.nn.relu(convolution(pool_0, conv_1_params['W']) + conv_1_params['b'])
pool_1 = max_pooling(conv_1)

reshaped_X = tf.reshape(pool_1, [-1, 300 * 480 * 4])
fc_layer_0 = tf.nn.relu(tf.matmul(reshaped_X, fc_0_params['W']) + fc_0_params['b'])
fc_layer_1 = tf.matmul(fc_layer_0, fc_1_params['W']) + fc_1_params['b']

**Selecting the data to be used for training**

In [None]:
# TODO: Change the inputs to arrays or lists to load effectively them at this cell.

Images = []
Image.open('data/object-dataset/' + X_train[0])

**Training the model**

In [None]:
config = tf.ConfigProto()
config.gpu_options.allow_grow
sess = tf.Session()

# Defining the loss function
loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=fc_layer_1))

training_step = tf.train.RMSPropOptimizer(1e-4).minimize(loss)
prediction_test = tf.equal(tf.argmax(fc_layer_1,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(prediction_test, tf.float32))

sess.run(tf.global_variables_initializer())

In [None]:
# TODO: Accuracy not being measured as should be.
#       Beyond that, put everything in half-precision to save memory

# Effectively training
with sess.as_default():
    for iteration in range(100):
        if not (iteration % 10):
            loss_value = loss.eval(feed_dict={Y:Y_train[:2], X:Images})
            print("Loss: %f" % loss_value)
        if not (iteration % 50):
            acc = accuracy.eval(feed_dict={Y:Y_train[:2], X:Images})
            print("Iteration: %d, Accuracy: %f" % (iteration, acc))
        training_step.run(feed_dict={Y:Y_train[:2], X:Images})

In [None]:
import matplotlib.pyplot as plt
plt.imshow(Images[0])
plt.show()