In [None]:
from warnings import filterwarnings
filterwarnings('ignore')

import os
import shutil
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Activation, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.preprocessing import image
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from sklearn.metrics import confusion_matrix, classification_report

In [None]:
BASE_PATH = '../input/gtsrb-german-traffic-sign/'

In [None]:
def get_data():
    files,labels=[],[]
    for dirname in os.listdir(f'{BASE_PATH}Train'):
        for file in os.listdir(f'{BASE_PATH}Train/{dirname}'):
            if file.endswith('.png'):
                files.append(file)
                labels.append(dirname)
    return pd.DataFrame(data={
        'filename': files,
        'target': labels
    })

In [None]:
data = get_data()

In [None]:
data_gb_target = data.groupby(['target'])['filename'].agg({'count'}).reset_index().sort_values(by=['count'], ascending=False)

plt.figure(figsize=(21,9))
ax = sns.barplot(x="target", y='count', data=data_gb_target, order=data_gb_target['target'])

for i in ax.patches:
    ax.annotate(f'{int(i.get_height())}', (i.get_x()+0.4, i.get_height()),
                    ha='center', va='bottom',
                    color= 'black')

plt.title("Image count in each class")
plt.ylabel("Counts")
plt.xlabel("Classes")    
plt.show()

In [None]:
test_filenames = [file for file in os.listdir(f'{BASE_PATH}Test') if file.endswith('.png')]
test_df = pd.DataFrame({
    'filename': test_filenames
})

In [None]:
IMG_H = 30
IMG_W = 30
IMG_C = 3

BATCH_SIZE = 32
EPOCHS = 30

AUTOTUNE = tf.data.experimental.AUTOTUNE

In [None]:
def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=IMG_C)
  # resize the image to the desired size
  return tf.image.resize(img, [IMG_W, IMG_H])

def process_path(file_path):
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  f"{BASE_PATH}Train",
  validation_split=0.25,
  subset="training",
  seed=123,
  image_size=(IMG_W, IMG_H),
  batch_size=BATCH_SIZE)

validation_ds = tf.keras.preprocessing.image_dataset_from_directory(
  f"{BASE_PATH}Train",
  validation_split=0.25,
  subset="validation",
  seed=123,
  image_size=(IMG_W, IMG_H),
  batch_size=BATCH_SIZE)

test_ds = tf.data.Dataset.list_files(str(f'{BASE_PATH}Test/*.png'), shuffle=False)
test_ds = test_ds.map(process_path, num_parallel_calls=AUTOTUNE)
test_ds = test_ds.map(lambda x: tf.reshape(x, [-1, IMG_W, IMG_H, IMG_C]))
print(f"Using {len(test_ds)} files for testing")

In [None]:
class_names = train_ds.class_names
print(len(class_names))

In [None]:
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
validation_ds = validation_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)

normalized_train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
normalized_val_ds = validation_ds.map(lambda x, y: (normalization_layer(x), y))
normalized_test_ds = test_ds.map(lambda x: (normalization_layer(x)))

image_batch, labels_batch = next(iter(normalized_train_ds))
first_image = image_batch[0]

# Notice the pixels values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))

In [None]:
def create_model():
    
    data_augmentation = tf.keras.Sequential([
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.1)
    ])
    
    tf.keras.backend.clear_session()
    if tf.keras.backend.image_data_format() == 'channels_first':
        input_shape = (IMG_C, IMG_W, IMG_H)
    else:
        input_shape = (IMG_W, IMG_H, IMG_C)
    inputs = Input(shape=input_shape)

    x = data_augmentation(inputs)
    
    x = Conv2D(32, (5,5), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(32, (5,5), activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2,2), strides=(2,2))(x)
    x = Dropout(.25)(x)

    x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3,3), activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2,2), strides=(2,2))(x)
    x = Dropout(.25)(x)

    x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, (3,3), activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2,2), strides=(2,2))(x)
    x = Dropout(.25)(x)
    

    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(.3)(x)

    outputs = Dense(len(class_names), activation='softmax')(x)

    tf.keras.backend.clear_session()
    cmodel = Model(inputs, outputs)
    cmodel.summary()
    
    return cmodel

In [None]:
model = create_model()

In [None]:
lr_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
es = EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=5,
                              verbose=0, mode='auto')

In [None]:
model.compile(optimizer='adamax', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
callbacks_list = [es, lr_reduction]

history = model.fit(
                      normalized_train_ds,
                      validation_data=normalized_val_ds,
                      epochs=EPOCHS,
                      callbacks=callbacks_list,
                      shuffle=True
                    )

In [None]:
model.evaluate(normalized_train_ds), model.evaluate(normalized_val_ds)

In [None]:
# Plot the loss and accuracy curves for training and validation 
fig, ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['accuracy'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_accuracy'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

In [None]:
y_val_org = []
for i,kk in normalized_val_ds:
    y_val_org.extend(kk.numpy())

y_val_org = np.array(y_val_org)

In [None]:
len(y_val_org)

In [None]:
ypred_val = model.predict(normalized_val_ds)
ypred_val = np.argmax(ypred_val, axis=1)

cf_matrix = confusion_matrix(y_val_org, ypred_val)

plt.figure(figsize=(20,8))
ax = sns.heatmap(cf_matrix, annot=True, fmt='g')
plt.show()

print("\n\n")
print(classification_report(y_val_org, ypred_val))

In [None]:
results = model.predict(normalized_test_ds)
results = np.argmax(results,axis = 1)

test_df['label'] = results
test_df.to_csv("submission.csv",index=False)

In [None]:
test_df.head()