In [1]:
import os
import cv2
import csv
import numpy as np
import pandas as pd
from time import time
from tensorflow.keras import utils
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization
from tensorflow.keras.applications import VGG16
import matplotlib.pyplot as plt
import random
%matplotlib inline

In [None]:
# Display library versions
print("Library Versions:")
print(f"OS: {os.name}")
print(f"OpenCV version: {cv2.__version__}")
print(f"NumPy version: {np.__version__}")
print(f"Pandas version: {pd.__version__}")
print(f"TensorFlow version: {tf.__version__}")
print(f"Matplotlib version: {plt.matplotlib.__version__}")

# Check if TensorFlow is using GPU or CPU
print("\nTensorFlow Device Status:")
if tf.test.is_gpu_available():
    print("TensorFlow is using GPU.")
    print(f"Available GPU devices: {tf.config.list_physical_devices('GPU')}")
else:
    print("TensorFlow is using CPU.")

In [None]:
# Initial Variables 
MIN_IMGS_IN_CLASS=500;
image_size = 50;

In [None]:
# Here is the meta files on the 43 seperate traffic sign classes we are learning to identify 
plt.figure(figsize=(18, 18))
for i in range (0,43):
    plt.subplot(8,8,i+1)
    plt.xticks([])
    plt.yticks([])
    path = "data/meta/{0}.png".format(i)
    img = plt.imread(path)
    plt.imshow(img)
    plt.xlabel(i)

In [None]:
train_image_data = pd.read_csv('data/train.csv')
train_image_data.describe
train_image_data.value_counts().to_dict()

In [None]:
train_image_data.boxplot(['Width', 'Height'])

In [None]:
train_image_data.value_counts('ClassId').plot(
    kind='pie',
    figsize=(10, 10),
    autopct='%1.1f%%',  # Display percentages
    startangle=90,      # Start the pie chart at 90 degrees
    title='Percentage of Images in Each Category'
)

In [None]:
import matplotlib.pyplot as plt

# Create bar chart with horizontal orientation
ax = train_image_data.value_counts('ClassId').plot(
    kind='barh',                     # Horizontal bar chart
    figsize=(20, 10),                # Customize figure size
    color='lightgreen',              # Bar color
    edgecolor='black',               # Outline for bars
    title='Number of Labelled Images in Each Category'  # Title
)

# Annotate the bars with their values
for p in ax.patches:
    ax.annotate(str(int(p.get_width())), (p.get_width() + 5, p.get_y() + 0.5),
                ha='left', va='center', fontsize=10, color='black', xytext=(0, 0),
                textcoords='offset points')

# Add axis labels
plt.xlabel('Count of Images')
plt.ylabel('Class ID')

plt.show()


In [None]:
#Functions to perform tasks 
#Preprocess function
def preprocess(image, out_side):
    height, width = image.shape[:2]
    scale = out_side / max(height, width)
    dx = (out_side - scale * width) / 2
    dy = (out_side - scale * height) / 2
    trans = np.array([[scale, 0, dx], [0, scale, dy]], dtype=np.float32)
    image = cv2.warpAffine(image, trans, (out_side, out_side), flags=cv2.INTER_AREA)
    image = cv2.resize(image, (out_side, out_side))
    return image

#mixing images function
def mixing(images, labels):
    images = np.array(images)
    labels = np.array(labels)
    s = np.arange(images.shape[0])
    np.random.seed(43)
    np.random.shuffle(s)
    images=images[s]
    labels=labels[s]
    return images, labels

#load train images function
def load_train(path, out_side):
    images = []
    labels = []
    for folder in os.listdir(os.path.join(path, 'train')):
        
        cur_path = os.path.join(path, 'train', folder)
        print(cur_path)
        for file_name in os.listdir(cur_path):
            image = cv2.imread(os.path.join(cur_path, file_name))
            images.append(preprocess(image, out_side))
            labels.append(int(folder))

    return mixing(images, labels)

#load test images function
def load_test(path, out_side):
    images = []
    labels = []
    with open(os.path.join(path, 'test.csv'), 'r') as f:
        reader = csv.reader(f)
        for rows in reader:
            name = rows[7]
            if (name == 'Path'):
                continue
            image = cv2.imread(os.path.join(path, rows[7]))
            images.append(preprocess(image, out_side))
            labels.append(int(rows[6]))

    return mixing(images, labels)


In [None]:
#Load train images
train_images, train_labels = load_train("data/", image_size)

In [None]:
print(train_images.shape)
print(train_labels.shape)

In [None]:
#Once we have the training data loaded, we can preview them
def preview(images, labels):
    plt.figure(figsize=(16, 16))
    for c in range(len(np.unique(labels))):
        i = random.choice(np.where(labels == c)[0])
        plt.subplot(8, 8, c+1)
        plt.axis('off')
        plt.title('class: {}'.format(c))
        plt.imshow(images[i])

In [None]:
# Data augumentation
def augment_imgs(imgs, p):
    """
    Performs a set of augmentations with with a probability p
    """
    from imgaug import augmenters as iaa
    augs =  iaa.SomeOf((2, 4),
          [
              iaa.Crop(px=(0, 4)), # crop images from each side by 0 to 4px (randomly chosen)
              iaa.Affine(scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}),
              iaa.Affine(translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}),
              iaa.Affine(rotate=(-45, 45)), # rotate by -45 to +45 degrees)
              iaa.Affine(shear=(-10, 10)) # shear by -10 to +10 degrees
          ])
    
    seq = iaa.Sequential([iaa.Sometimes(p, augs)])
    res = seq.augment_images(imgs)
    return res

def count_images_in_classes(lbls):
    dct = {}
    for i in lbls:
        if i in dct:
            dct[i] += 1
        else:
            dct[i] = 1
    return dct

In [None]:
def augmentation(imgs, lbls):
    classes = train_image_data.value_counts('ClassId').to_dict()
    for i in range(len(classes)):
        if (classes[i] < MIN_IMGS_IN_CLASS):
            # Number of samples to be added
            add_num = MIN_IMGS_IN_CLASS - classes[i]
            imgs_for_augm = []
            lbls_for_augm = []
            for j in range(add_num):
                im_index = random.choice(np.where(lbls == i)[0])
                imgs_for_augm.append(imgs[im_index])
                lbls_for_augm.append(lbls[im_index])
            augmented_class = augment_imgs(imgs_for_augm, 1)
            augmented_class_np = np.array(augmented_class)
            augmented_lbls_np = np.array(lbls_for_augm)
            imgs = np.concatenate((imgs, augmented_class_np), axis=0)
            lbls = np.concatenate((lbls, augmented_lbls_np), axis=0)
    return (imgs, lbls)