In [1]:
import os
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import seaborn as sns
from PIL import Image
import shutil
import numpy as np

In [2]:
# PART ONE: DATA PREPROCESSING & PREPARATION

In [3]:
# Loading in the dataset

trainDataset = pd.read_csv('./roadsigns/train/_annotations.csv')
trainDataset['filename'] = './roadsigns/train/' + trainDataset['filename']

validationDataset = pd.read_csv('./roadsigns/valid/_annotations.csv')
validationDataset['filename'] = './roadsigns/valid/' + validationDataset['filename']

testDataset = pd.read_csv('./roadsigns/test/_annotations.csv')
testDataset['filename'] = './roadsigns/test/' + testDataset['filename']

In [4]:
# Checking if there are any data objects with null/improper values

numTrainEntriesNull = trainDataset.isnull().sum()
print("Train:")
print(numTrainEntriesNull)
print()

numValidationEntriesNull = validationDataset.isnull().sum()
print("Validation:")
print(numValidationEntriesNull)
print()

numTestEntriesNull = testDataset.isnull().sum()
print("Test:")
print(numTestEntriesNull)

Train:
filename    0
width       0
height      0
class       0
xmin        0
ymin        0
xmax        0
ymax        0
dtype: int64

Validation:
filename    0
width       0
height      0
class       0
xmin        0
ymin        0
xmax        0
ymax        0
dtype: int64

Test:
filename    0
width       0
height      0
class       0
xmin        0
ymin        0
xmax        0
ymax        0
dtype: int64


In [5]:
# Resizing and normalizing pixel values for training/validating/testing the CNN

def resize(imagePath):
    newImageSize = (256, 256)
    try:
        with Image.open(imagePath) as img:
            img = img.convert ('L')
            imgResized = img.resize (newImageSize)
            imgArray = np.array(imgResized, dtype=np.float32)
            imgArray /= 255.0
            image.append (imgArray)
    except Exception as e:
        print(f"Error processing {imagePath}: {e}")

datasets = [trainDataset, validationDataset, testDataset]
for dataset in datasets:
    image = []
    for index, row in dataset.iterrows():
        if not os.path.exists(row['filename']):
            print(f"File not found: {row['filename']}")
    
    for index, row in dataset.iterrows():
        resize(row['filename'])
    dataset['Image'] = image

In [6]:
# Encoding the traffic sign classes pertaining to each image

label_encoder = LabelEncoder()

trainDataset['class'] = label_encoder.fit_transform(trainDataset['class'])
validationDataset['class'] = label_encoder.fit_transform(validationDataset['class'])
testDataset['class'] = label_encoder.fit_transform(testDataset['class'])

In [7]:
# Splitting the data into training and testing sets

X_train = trainDataset['Image']
X_validation = validationDataset['Image']
X_test = testDataset['Image']

y_train = to_categorical(trainDataset['class'], num_classes=29)
y_validation = to_categorical(validationDataset['class'], num_classes=29)
y_test = to_categorical(testDataset['class'], num_classes=29)

In [8]:
# PART TWO: BUILDING & TRAINING THE MODEL

In [9]:
# PART THREE: TESTING THE MODEL & EVALUATING PERFORMANCE

In [10]:
# (optional) PART FOUR: UI TO INTERACT WITH THE MODEL