In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,Dense,Dropout,BatchNormalization,Activation,Flatten,MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from PIL import Image

In [None]:
# Importing data

train_df = pd.read_csv('../input/gtsrb-german-traffic-sign/Train.csv')
train_df.head()

In [None]:
# Checking Null values

train_df.isnull().sum()

In [None]:
# Finding No. of output categories

len(train_df['ClassId'].value_counts())

In [None]:
train_df.info()

In [None]:
plt.figure(figsize=(16,6))
sns.countplot(x=train_df['ClassId'])

In [None]:
# Finding non-image file on path feature

a=[i for i in train_df['Path'] if not i.endswith('.png')]
a

### -->What is the use of Conv2D?
#### Mandatory Conv2D parameter is the numbers of filters that convolutional layers will learn from. It is an integer value and also determines the number of output filters in the convolution. Here we are learning a total of 32 filters and then we use Max Pooling to reduce the spatial dimensions of the output volume.

### --->How does batch normalization work?
#### Batch normalisation normalises a layer input by subtracting the mini-batch mean and dividing it by the mini-batch standard deviation.

### --->Max pooling?
#### Max pooling is a pooling operation that selects the maximum element from the region of the feature map covered by the filter. Thus, the output after max-pooling layer would be a feature map containing the most prominent features of the previous feature map.

### --->Dropout?
#### Dropout is a technique where randomly selected neurons are ignored during training. They are “dropped-out” randomly. This means that their contribution to the activation of downstream neurons is temporally removed on the forward pass and any weight updates are not applied to the neuron on the backward pass.

### --->Flatten?
#### Flattening is converting the data into a 1-dimensional array for inputting it to the next layer.

In [None]:
plt.figure(figsize=(22,12))
im=Image.open('../input/cnn-img/cnn.jpg')
plt.axis('off')
plt.imshow(im)

In [None]:
# Creating Model 

model = Sequential()
model.add(Conv2D(32,(3,3),activation='relu',input_shape=(120,120,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(128,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(43,activation='softmax'))
model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
model.summary()

### --->Callback API?
#### A callback is an object that can perform actions at various stages of training (e.g. at the start or end of an epoch, before or after a single batch, etc).

In [None]:
# Creating callbacks 
(e.g. at the start or end of an epoch, before or after a single batch, etc).

early_stopping=EarlyStopping(monitor='val_accuracy',patience=5)
reduce_lr=ReduceLROnPlateau(monitor='val_accuracy',factor=0.5,patience=5,verbose=1,min_lr=0.0001,)
callback=[early_stopping,reduce_lr]

In [None]:
# Converting class id to string

train_df['ClassId']=train_df['ClassId'].astype(str)

In [None]:
train_df=train_df[['ClassId','Path']]
train_df.head()

In [None]:
# Train_test_split

train_data,validation_data=train_test_split(train_df,test_size=0.2,random_state=10)

In [None]:
train_data.shape, validation_data.shape

In [None]:
# Reseting split data index

train_data=train_data.reset_index(drop=True)
validation_data=validation_data.reset_index(drop=True)

In [None]:
img = Image.open('../input/gtsrb-german-traffic-sign/' + train_df['Path'][24])
plt.imshow(img)

### --->Data Augmentation
#### Techniques used to increase the amount of data by adding slightly modified copies of already existing data or newly created synthetic data from existing data. It acts as a regularizer and helps reduce overfitting when training a machine learning model.

In [None]:
# Training data augmentation

train_gen=ImageDataGenerator(rotation_range=15,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             rescale=1./255,
                             horizontal_flip=True)

train_generator=train_gen.flow_from_dataframe(train_data,
                                              '../input/gtsrb-german-traffic-sign',
                                              x_col='Path',
                                              y_col='ClassId',
                                              
                                              target_size=(120,120),
                                              batch_size=30,
                                              class_mode='categorical')

In [None]:
# Validating data augmentation

validation_gen=ImageDataGenerator(rescale=1./255)

validation_generator=validation_gen.flow_from_dataframe(validation_data,
                                              '../input/gtsrb-german-traffic-sign',
                                              x_col='Path',
                                              y_col='ClassId',
                                              target_size=(120,120),
                                              batch_size=30,
                                              class_mode='categorical')

In [None]:
# Executing the model

history=model.fit_generator(train_generator,
                            steps_per_epoch=31367//30,
                            epochs=30,
                            verbose=1,
                            callbacks=callback,
                            validation_data=validation_generator,
                            validation_steps=7842//30)

In [None]:
# Reading the test data

test_df=pd.read_csv('../input/gtsrb-german-traffic-sign/Test.csv',usecols=['Path','ClassId'])
test_df.head()

In [None]:
# Checking null values

test_df.isnull().sum()

In [None]:
# Finding the non-image file

b=[i for i in test_df['Path'] if not i.endswith('.png')]
b

In [None]:
# Test data augmentation

test_gen=ImageDataGenerator(rescale=1./255)

test_generator=test_gen.flow_from_dataframe(test_df,
                                            '../input/gtsrb-german-traffic-sign',
                                            x_col='Path',
                                              y_col=None,
                                              target_size=(120,120),
                                              batch_size=15,
                                              class_mode=None)

In [None]:
# Predicting the Images

predict=model.predict_classes(test_generator)

In [None]:
from sklearn.metrics import confusion_matrix,classification_report

In [None]:
print(classification_report(test_df['ClassId'],test_df['Predict']))

In [None]:
confusion_matrix(test_df['ClassId'],test_df['Predict'])

In [None]:
# Finding index of correct prediction

test_df['Predict']=predict
a=np.where(test_df['ClassId']==test_df['Predict'])
a

In [None]:
correct_pred=test_df.loc[[10,59,117,193,213,292,307,329,335],]
correct_pred

In [None]:
correct_pred=correct_pred.reset_index(drop=True)
correct_pred.head()

In [None]:
# Visualizing only for correct prediction

plt.figure(figsize=(16,16))
plt.tight_layout()
class_id=correct_pred['ClassId']
pred=correct_pred['Predict']
for i,j in enumerate(class_id):
    img=Image.open('../input/gtsrb-german-traffic-sign/Meta/'+str(j)+'.png')
    plt.subplot(9,2,2*i+1)
    plt.axis('off')
    plt.title('ClassId')
    plt.imshow(img)
for i,j in enumerate(pred):
    img=Image.open('../input/gtsrb-german-traffic-sign/Meta/'+str(j)+'.png')
    plt.subplot(9,2,2*i+2)
    plt.axis('off')
    plt.title('Predicted_ClassId')
    plt.imshow(img)       

In [None]:
# Visualizing only for correct prediction with test data

plt.figure(figsize=(16,16))
#plt.tight_layout()
class_id=correct_pred['ClassId']
pred=correct_pred['Predict']

for i,j in enumerate(correct_pred['Path']):
    img=Image.open('../input/gtsrb-german-traffic-sign/'+j)
    plt.subplot(9,3,3*i+1)
    plt.axis('off')
    plt.imshow(img)
    
for i,j in enumerate(class_id):
    img=Image.open('../input/gtsrb-german-traffic-sign/Meta/'+str(j)+'.png')
    plt.subplot(9,3,3*i+2)
    plt.axis('off')
    plt.title('ClassId')
    plt.imshow(img)

for i,j in enumerate(pred):
    img=Image.open('../input/gtsrb-german-traffic-sign/Meta/'+str(j)+'.png')
    plt.subplot(9,3,3*i+3)
    plt.axis('off')
    plt.title('Predicted_ClassId')
    plt.imshow(img) 