# Intro

The German Traffic Sign Benchmark is a multi-class, single-image classification challenge held at the International Joint Conference on Neural Networks (IJCNN) 2011. We cordially invite researchers from relevant fields to participate: The competition is designed to allow for participation without special domain knowledge. Our benchmark has the following properties:

1. Single-image, 
2. Multi-class classification problem
3. More than 40 classes
4. More than 50,000 images in total
5. Large, lifelike database

### Labels Overview

In [None]:
import pandas as pd
pd.read_csv('../input/traffic-signs-classification/labels.csv')

#   

# Importing Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.image import imread
import seaborn as sns

# Data Collection

In [None]:
dir_path = '../input/gtsrb-german-traffic-sign'

In [None]:
os.listdir(dir_path)

In [None]:
#Assigning the path for train and test images

train_path = dir_path +'/Train'
test_path = dir_path + '/Test'

In [None]:
print(sorted(os.listdir(train_path)))

In [None]:
sorted(os.listdir(test_path))

##   

# Visualization

#### Visualizing 25 random sample images from test set

In [None]:
import random

images_path = os.listdir(test_path)

plt.figure(figsize=(25,25))


for i in range(1,26):
    
    plt.subplot(5,5,i)
    random_img_path = test_path +'/'+ random.choice(images_path)
    rand_img = imread(random_img_path)
    plt.imshow(rand_img)
    plt.xlabel(rand_img.shape[1], fontsize = 20)#width of image
    plt.ylabel(rand_img.shape[0], fontsize = 20)#height of image


The dimensions of the images are not fixed. 

#### Note:
Convolutional neural networks cannot perform on images that have various dimensions.
We will resize these images during our model building.


But first find the mean of the dimensions of all the images in training set.

In [None]:
dim1 = []
dim2 = []

for i in range(0,43):
    labels = train_path + '/{0}'.format(i)
    image_path = os.listdir(labels)
    for x in image_path:
        img = imread(labels + '/' + x)
        dim1.append(img.shape[0])
        dim2.append(img.shape[1])

#### Exploring the dimensions with a jointplot

In [None]:
sns.jointplot(dim1,dim2)
plt.show()

In [None]:
np.mean(dim1)

In [None]:
np.mean(dim2)

Since the mean of both dimensions is around 50 , we will use (50x50) as the shape of images.

In [None]:
image_shape = (50,50)

#  

## Data Preprocessing

#### Importing the images

In [None]:
from PIL import Image

images = []
label_id = []

for i in range(43):
    labels = train_path + '/{0}'.format(i)
    image_path = os.listdir(labels)
    for x in image_path:
        img = Image.open(labels + '/' + x)
        img = img.resize(image_shape)
        img = np.array(img)
        images.append(img)
        label_id.append(i)

#### Scaling the images so that the values remain between 0 and 1

In [None]:
#Converting images into numpy array
images = np.array(images)
#The pixel value of each image ranges between 0 and 255
#Dividing each image by 255 will scale the values between 0 and 1. This is also known as normalization.
images = images/255 

In [None]:
label_id = np.array(label_id)
label_id.shape

In [None]:
images.shape

In [None]:
plt.figure(figsize=(15,5))
sns.countplot(label_id)
plt.title('Distribution of images among different classes', fontsize = 15)
plt.xlabel('Label_id', fontsize=12)
plt.show()

In [None]:
#Saving the scaled images and labels for future use
np.save('Training_set', images)
np.save('Label_Id', label_id)

## Splitting the train data into train and validation data

In [None]:
import numpy as np
import pandas as pd

In [None]:
images = np.load('Training_set.npy')
label_id = np.load('Label_Id.npy')

In [None]:
#Splitting the data
from sklearn.model_selection import  train_test_split
x_train, x_val, y_train, y_val = train_test_split(images, label_id , test_size = 0.2, random_state = 42)

#### Changing target labels to categorical  using one-hot encoding technique

In [None]:
#keras has a built-in function for one-hot encoding.
from tensorflow.keras.utils import to_categorical

y_train_cat = to_categorical(y_train)

y_val_cat = to_categorical(y_val)

#    

## Model Building

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPool2D

In [None]:
model = Sequential()

#1st layer
model.add(Conv2D(filters = 64, kernel_size = (3,3), input_shape = x_train.shape[1:], activation = 'relu', padding = 'same'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.5))

#2nd layer
model.add(Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.5))

#3rd layer
model.add(Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.5))

model.add(Flatten())

#Dense layer
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.5))

#Output layer
model.add(Dense(43, activation = 'softmax'))

In [None]:
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [None]:
model.summary()

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 2)

model.fit(
    
    x_train, y_train,
    epochs = 25,
    batch_size = 64,
    validation_data = (x_val, y_val),
    callbacks = [early_stopping],
    verbose = 2

)

#### Achieved highest accuracy of 99.50% on validation data

In [None]:
#Saving the model
model.save('Model.h5')

## Model Evaluation


In [None]:
evaluation = pd.DataFrame(model.history.history)

evaluation[['accuracy', 'val_accuracy']].plot()
evaluation[['loss', 'val_loss']].plot()

####   

## Testing on test data

In [None]:
from tensorflow.keras.models import load_model
model = load_model('Model.h5')

#### Note: The test images folder in the original dataset has a blank csv file which cannot be opened with the above function. So i copied that folder and deleted that csv file and uploaded the test images again seperately. These test images are same as the test images in the original dataset

In [None]:
test_path = '../input/test-images/Test'
test_img = sorted(os.listdir(test_path))

##### For some unknown reason , the images in kaggle kernel is not showing in the order they are in the Test folder. Upon inspection it is seen that the images are in sorted order. So using sorted() function to sort them.

In [None]:
#defining a function that will scale images
from PIL import Image

def scaling(test_images, test_path):
    images = []

    image_path = test_images
    
    for x in image_path:
        img = Image.open(test_path + '/' + x)
        img = img.resize((50,50))
        img = np.array(img)
        images.append(img)

    #Converting images into numpy array
    images = np.array(images)
    #The pixel value of each image ranges between 0 and 255
    #Dividing each image by 255 will scale the values between 0 and 1. This is also known as normalization.
    images = images/255

    return images

**The above function can be used to scale any new traffic-sign images that can be predicted with our model. This is a general purpose function for code reusability.**

In [None]:
test_images = scaling(test_img,test_path)

#### Test labels

In [None]:
test = pd.read_csv('../input/gtsrb-german-traffic-sign/Test.csv')

y_test = test['ClassId'].values

y_test

## Testing on test images

In [None]:
y_pred = model.predict_classes(test_images)

y_pred

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test,y_pred))

#### We achieved an overall accuracy of 97% on our model. This is pretty good and we can use this model for predicting some other Traffic signs as well in future.