
# IT1244 Course Project

---

## Traffic Signs Recognition Dataset

You can view and download the dataset here: https://drive.google.com/drive/folders/1EoRj4S3m1-A4_FEqn8MMRBcd_fqJ7h9F


# Downloading the Dataset and Importing Modules

In [None]:
import os
from PIL import Image
import shutil
import zipfile
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from sklearn.model_selection import train_test_split
from keras.preprocessing import image

Unzipping the file and organising the images according to their class labels

In [None]:
# Specify the path where zip file was uploaded
zip_path = "/content/traffic_sign_images.zip"

# Unzip the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
  zip_ref.extractall("/content")

# Define the paths for the dataset
dataset_path = "/content/images"
organized_path = "/content/organized_traffic_sign_images"

# Create the organized dataset directory if it doesn't exist
if not os.path.exists(organized_path):
  os.makedirs(organized_path)

# Create and sort directories for each class label from 0 to 22
for label in range(23):
  class_dir = os.path.join(organized_path, str(label))
  os.makedirs(class_dir, exist_ok = True)

# Loop through all files in the dataset
for filename in os.listdir(dataset_path):
  if filename.endswith(".png"):
    class_label = filename.split('_')[0]

    # Move the image to the appropriate class directory
    src = os.path.join(dataset_path, filename)
    dst = os.path.join(organized_path, class_label, filename)
    shutil.move(src, dst)

# Remove the empty original dataset directory
if os.path.exists(dataset_path):
  os.removedirs(dataset_path)

In [None]:
# Check if the new directory contains class folders
sorted_folders = sorted(os.listdir(organized_path), key=int)
print(sorted_folders)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22']


Creating 5-fold bins

In [None]:
# Define the paths
train_dir = "/content/traffic_sign_images/train"
test_dir = "/content/traffic_sign_images/test"

# Create the directories if they don't exist
if not os.path.exists(train_dir):
  os.makedirs(train_dir, exist_ok = True)

if not os.path.exists(test_dir):
  os.makedirs(test_dir, exist_ok = True)

# Iterate through each class folder
for class_name in sorted(os.listdir(organized_path)):
  class_path = os.path.join(organized_path, class_name)

  # List all files in the class directory
  image_files = os.listdir(class_path)

  # Shuffle the image files with a fixed seed for reproducibility
  np.random.seed(42)
  np.random.shuffle(image_files)

  # Initialize KFold with 5 splits
  kf = KFold(n_splits=5, shuffle=True, random_state=42)

  # Image resizing to fit models
  TARGET_SIZE = (94, 94)  # Width, Height

  for fold, (_, test_index) in enumerate(kf.split(image_files), start=1):
    # Move the files to the respective directories
    # Create a directory for this bin
    bin_folder = f'bin_{fold}'
    os.makedirs(bin_folder, exist_ok=True)

    # Save images belonging to this fold/bin
    for idx in test_index:
        image_file = image_files[idx]
        img = Image.open(os.path.join(image_folder, image_file))
        img = img.resize(TARGET_SIZE, Image.LANCZOS)
        img.save(os.path.join(bin_folder, image_file))

    print(f'Saved bin {fold} to folder: {bin_folder}')

# Remove the organized_data directory after splitting
if os.path.exists(organized_path):
  shutil.rmtree(organized_path)

Folder preparation for K-FOLD

In [None]:
# Specify the path where zip file was uploaded
test_path = "/content/bin_4.zip"
train_path = "/content/train_dataset.zip"


os.makedirs("/content/train_dataset", exist_ok=True)

os.makedirs("/content/test_dataset", exist_ok=True)


# Helper function to unzip and flatten folder structure
def unzip_and_flatten(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        # Temporary extraction folder to handle nested folders
        temp_extract_folder = extract_to + "_temp"
        os.makedirs(temp_extract_folder, exist_ok=True)

        # Extract all contents
        zip_ref.extractall(temp_extract_folder)

        # Move files from temp folder to target folder, avoiding nested folders
        for root, dirs, files in os.walk(temp_extract_folder):
            for file in files:
                if not file.startswith('.'):  # Avoid hidden files like __MACOSX
                    shutil.move(os.path.join(root, file), extract_to)

        # Clean up the temporary folder
        shutil.rmtree(temp_extract_folder)

# Unzip the files
unzip_and_flatten(test_path, "/content/test_dataset")
unzip_and_flatten(train_path, "/content/train_dataset")

Splitting the dataset into training and testing sets (no need for kfold)



In [None]:
# Define the paths
train_dir = "/content/traffic_sign_images/train"
test_dir = "/content/traffic_sign_images/test"

# Create the directories if they don't exist
if not os.path.exists(train_dir):
  os.makedirs(train_dir, exist_ok = True)

if not os.path.exists(test_dir):
  os.makedirs(test_dir, exist_ok = True)

# Iterate through each class folder
for class_name in sorted(os.listdir(organized_path)):
  class_path = os.path.join(organized_path, class_name)

  # List all files in the class directory
  image_files = os.listdir(class_path)

  # Split the images into training and validation sets (80-20 split)
  train_files, test_files = train_test_split(image_files,
                                            test_size = 0.2,
                                            random_state = 42)

  # Move the files to the respective directories
  for file_name in train_files:
    src = os.path.join(class_path, file_name)
    dst = os.path.join(train_dir, file_name)
    shutil.copy(src, dst)

  for file_name in test_files:
    src = os.path.join(class_path, file_name)
    dst = os.path.join(test_dir, file_name)
    shutil.copy(src, dst)

# Remove the organized_data directory after splitting
if os.path.exists(organized_path):
  shutil.rmtree(organized_path)

Loading training and validation data (For CNN)

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img

# Define the dataset path
dataset_path = "/content/traffic_sign_images"
train_dataset_path = "/content/traffic_sign_images/train"
test_dataset_path = "/content/traffic_sign_images/test"

# Function to load images and labels
def load_images_and_labels(directory):
    images = []
    labels = []

    for filename in os.listdir(directory):
        img_path = os.path.join(directory, filename)
        if os.path.isfile(img_path):
            # Load and preprocess image
            img = load_img(img_path, target_size=(94, 94))
            img_array = img_to_array(img) / 255.0  # Normalize the image
            images.append(img_array)

            label = int(filename.split('_')[0])  # Adjust as per your naming convention
            labels.append(label)

    return np.array(images), np.array(labels)

# Load training and validation data
X_train, y_train = load_images_and_labels(train_dataset_path)
X_val, y_val = load_images_and_labels(test_dataset_path)

# Print the shape of the loaded arrays
print('Shape of training images:', X_train.shape)
print('Shape of training labels:', y_train.shape)
print('Shape of training images:', X_val.shape)
print('Shape of training labels:', y_val.shape)

# Create data generator
datagen = ImageDataGenerator()

# Create data generators
train_generator = datagen.flow(
    X_train,
    y_train,
    batch_size=32
)

validation_generator = datagen.flow(
    X_val,
    y_val,
    batch_size=32
)

CNN Model

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
#Create the classifier architecture
model = Sequential()
#Add the first layer
model.add(Conv2D(32, (3, 3), activation = 'relu' , input_shape = (94, 94, 3)))
#add a pooling layer
model.add(MaxPooling2D(pool_size = (2, 2)))
#Add another convolution layer
model.add(Conv2D(64, (3, 3), activation = 'relu' ))
#add a pooling layer
model.add(MaxPooling2D(pool_size = (2, 2)))
#Add a flattening layer
model.add(Flatten())
#Add a layer with 800 neurons
model.add(Dense(800, activation = 'relu'))
#Add a dropout layer
model.add(Dropout(0.1))
#Add a layer with 400 neurons
model.add(Dense(400, activation = 'relu'))
#Add a dropout layer
model.add(Dropout(0.1))
#Add a layer with 200 neurons
model.add(Dense(200, activation = 'relu'))
#Add a layer with 23 neurons
model.add(Dense(23, activation = 'softmax'))

# Print the model summary
model.summary()

Compile the CNN Model

In [None]:
from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor = 'val_loss',
    patience = 3,
    restore_best_weights = True
    )

# Compiling the model with a loss function, optimizer and metrics
learning_rate = 0.0001
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer = optimizer,
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])

# Training the model
history = model.fit(train_generator,
                    batch_size = 32,
                    epochs = 10,
                    validation_data = validation_generator,
                    callbacks = [early_stopping])

Evaluate the CNN Model

In [None]:
test_loss, test_accuracy = model.evaluate(X_val, y_val)
print(f'Test accuracy: {test_accuracy:.2f}')
print(f'Test Loss: {test_loss:.2f}')

Analyse the CNN model results by visualisation

In [None]:
# Assuming 'history' is the object returned by model.fit()
# Plot training & validation accuracy values
plt.figure(figsize=(12, 5))

# Plot Accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')

# Plot Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')

# Display the plots
plt.show()

Checking for misclassified images - Credits to ChatGPT

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Step 1: Make predictions
predictions = model.predict(X_val)
predicted_classes = np.argmax(predictions, axis=1)

# Step 2: Identify misclassified images
misclassified_indices = np.where(predicted_classes != y_val)[0]

# Step 3: Visualize misclassified images
print(f'Number of misclassified images: {len(misclassified_indices)}')
num_misclassified = len(misclassified_indices)

import math

plt.figure(figsize=(15, 15))
num_cols = 3  # You can adjust this for more or fewer columns
num_rows = math.ceil(num_misclassified / num_cols)  # Calculate required rows

for i, index in enumerate(misclassified_indices):
    plt.subplot(num_rows, num_cols, i + 1)
    plt.imshow(X_val[index])  # Assuming X_val is in a format suitable for display
    plt.title(f'True: {y_val[index]}, Pred: {predicted_classes[index]}')
    plt.axis('off')

plt.tight_layout()
plt.show()

Prediction of images using trained model

In [None]:
import numpy as np
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt
from PIL import Image

def predict_image_class(model, image_path, class_labels, img_size=(94, 94)):
    # Load and preprocess the image
    img = Image.open(image_path)
    img = img.resize(img_size)
    img_array = np.array(img) / 255.0  # Normalize the image to [0, 1]
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

    # Make a prediction
    predictions = model.predict(img_array)
    predicted_class_index = np.argmax(predictions, axis=1)[0]
    predicted_class = class_labels[predicted_class_index]

    # Display the image with the predicted class
    plt.imshow(img)
    plt.title(f'Predicted Class: {predicted_class}')
    plt.axis('off')
    plt.show()

    return predicted_class

# Example usage
image_path = '/content/test_dataset/10_2309.png' #Change path for K-FOLD prediction if you want
class_labels = [str(i) for i in range(23)]  # Assuming labels are 0 to 22
predicted_class = predict_image_class(model, image_path, class_labels)
print("Predicted Class:", predicted_class)

Saving the model

In [None]:
# Save the trained model
model.save("traffic_sign_classifier_67.keras")

Feature Map visualisation for Conv2D - Courtesy of ChatGPT

In [None]:
import matplotlib.pyplot as plt

# Assume 'model' is your trained CNN and 'input_image' is a sample image
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import layers, Input

model=load_model('traffic_sign_classifier_4.keras') #Change to name of model file

model.summary()

input_image = Image.open('/content/traffic_sign_images/test/5_4261.png') #Change path accordingly

#input_image = np.random.rand(94, 94, 3)  # Replace with actual image of correct size

# Normalize the input (depends on how the model was trained)
input_image = np.array(input_image) / 255.0

# Reshape the input image for batch size (1, 94, 94, 3)
input_image = np.expand_dims(input_image, axis=0)

model.predict(input_image)
# Get the output of the first Conv2D layer
layer_outputs = [layer.output for layer in model.layers if isinstance(layer, layers.Conv2D)]

print("Shapes of Conv2D layer outputs:")
for i, output in enumerate(layer_outputs):
    print(f"Layer {i}: {output.shape}")

# Create a model that outputs the activations from the Conv2D layers
activation_model = Model(inputs=model.inputs, outputs=layer_outputs)


# Get the feature maps from the activation model
feature_maps = activation_model.predict(input_image)

source: https://machinelearningmastery.com/how-to-visualize-filters-and-feature-maps-in-convolutional-neural-networks/

https://www.youtube.com/watch?v=RPoAJ_J2Wno

In [None]:
# Plot the first 32 feature maps from the first Conv2D layer
plt.figure(figsize=(12, 12))
for i in range(32):
  plt.subplot(8, 4, i + 1)
  plt.imshow(feature_maps[0][0,:,:,i], cmap='viridis')
  plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Plot the first 32 feature maps from the first Conv2D layer
plt.figure(figsize=(12, 12))
for i in range(32):
  plt.subplot(8, 4, i + 1)
  plt.imshow(feature_maps[1][0,:,:,i], cmap='inferno')
  plt.axis('off')
plt.tight_layout()
plt.show()

Loading training and validation data (for SVM and RF)

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
import cv2
from sklearn.preprocessing import LabelEncoder

# Paths to your train and test directories
train_dir = '/content/train_dataset'
test_dir = '/content/test_dataset'

# Function to load images and labels
def load_images_and_labels(directory):
    images = []
    labels = []

    for filename in os.listdir(directory):
        img_path = os.path.join(directory, filename)
        if os.path.isfile(img_path):
            # Load image and resize to 94x94
            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
            img_resized = cv2.resize(img, (94, 94))
            img_normalized = img_resized / 255.0 # Normalize the image

            images.append(img_normalized.flatten())  # Flatten the image to 1D

            # Extract the class label from the filename
            class_label = int(filename.split('_')[0])  # Adjust as per your naming convention
            labels.append(class_label)

    return np.array(images), np.array(labels)

# Load the training and validation data
X_train, y_train = load_images_and_labels(train_dir)
X_val, y_val = load_images_and_labels(test_dir)

# Encode the labels as integers
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

# Print the shape of the loaded arrays
print('Shape of training images:', X_train.shape)
print('Shape of training labels:', y_train_encoded.shape)
print('Shape of training images:', X_val.shape)
print('Shape of training labels:', y_val_encoded.shape)

SVM model

In [None]:
import os
import cv2
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, accuracy_score

# Train an SVM model with RBF kernel
svm_model = svm.SVC(kernel='rbf', C=1.0)  # You can adjust kernel and C as necessary
svm_model.fit(X_train, y_train_encoded)

# Predict on the validation set
svm_predictions = svm_model.predict(X_val)
print(classification_report(y_val_encoded, svm_predictions))

# Evaluate accuracy
accuracy = accuracy_score(y_val_encoded, svm_predictions)
print(f'SVM Accuracy: {accuracy * 100:.2f}%')

In [None]:
import matplotlib.pyplot as plt

# Plot the comparison between actual vs predicted
plt.figure(figsize=(10, 5))
plt.scatter(range(len(y_val_encoded)), y_val_encoded, label='Actual')
plt.scatter(range(len(svm_predictions)), svm_predictions, label='Predicted', marker='x')
plt.legend()
plt.title('Actual vs Predicted Labels')
plt.show()

In [None]:
from sklearn.inspection import permutation_importance

# Assuming `svm_model` is your trained SVM and `rf_model` is your Random Forest model
# For example, to use with the SVM:
svm_permutation_importance = permutation_importance(svm_model, X_val, y_val, n_repeats=10, random_state=42)

# Similarly, for Random Forest:
#rf_permutation_importance = permutation_importance(rf_model, X_test, y_test, n_repeats=10, random_state=42)

# Display feature importances
print("SVM Feature Importances:", svm_permutation_importance.importances_mean)
#print("Random Forest Feature Importances:", rf_permutation_importance.importances_mean)

In [None]:
plt.bar(range(len(coefficients)), coefficients)
plt.title("SVM Feature Coefficients")
plt.show()

In [None]:
Random Forest Classification (Decision Tree)

In [None]:
# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

rfc_model = RandomForestClassifier(max_depth = 8,
                                   n_estimators=100)
rfc_model.fit(X_train, y_train_encoded)

# Predict on the validation set
rfc_predictions = rfc_model.predict(X_val)
print(classification_report(y_val_encoded, rfc_predictions))

# Calculate and print accuracy score
accuracy = accuracy_score(y_val_encoded, rfc_predictions)
print(f"Random Forest Accuracy: {accuracy * 100:.2f}%")

In [None]:
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Get feature importances
importances = model.feature_importances_

In [None]:
import matplotlib.pyplot as plt
import numpy as np

indices = np.argsort(importances)[::-1]
plt.title("Feature Importances")
plt.bar(range(X_train.shape[1]), importances[indices], align="center")
plt.xticks(range(X_train.shape[1]), indices)
plt.xlim([-1, X_train.shape[1]])
plt.show()

Resizing the images (No need for kfold)

In [None]:
# Define the paths to your training and testing datasets
train_dataset_path = "/content/traffic_sign_images/train"
test_dataset_path = "/content/traffic_sign_images/test"

# Define the target size for resizing
TARGET_SIZE = (94, 94)  # Width, Height

def resize_images_in_directory(directory):
  # Iterate through each image file in the class folder
  for file_name in os.listdir(directory):
    image_path = os.path.join(directory, file_name)

    with Image.open(image_path) as img:
      # Resize the image
      resized_img = img.resize(TARGET_SIZE, Image.LANCZOS)

      # Save the resized image back to the same path
      resized_img.save(image_path)


# Resize images in training and testing datasets
resize_images_in_directory(train_dataset_path)
resize_images_in_directory(test_dataset_path)

Viewing image

In [None]:
# Define the path to your dataset (train or test)
image_path = "/content/traffic_sign_images/test/10_600.png"
class_name = os.path.basename(image_path).split("_")[0]

# Display the image
image = Image.open(image_path)
plt.imshow(image)
plt.title(f'Class: {class_name}')
plt.show()

Loading training and validation data + data augmentation (for cnn)

In [None]:
import cv2  # OpenCV for image processing

# Define the dataset path
dataset_path = "/content/traffic_sign_images"
train_dataset_path = "/content/traffic_sign_images/train"
test_dataset_path = "/content/traffic_sign_images/test"

# Function to load images and labels
def load_images_and_labels(directory):
    images = []
    labels = []

    for filename in os.listdir(directory):
        img_path = os.path.join(directory, filename)
        if os.path.isfile(img_path):
            # Load and preprocess image
            img = load_img(img_path, target_size=(94, 94))
            img_array = img_to_array(img) / 255.0  # Normalize the image
            images.append(img_array)

            label = int(filename.split('_')[0])  # Adjust as per your naming convention
            labels.append(label)

    return np.array(images), np.array(labels)

# Custom function to add noise and blur
def augment_images(img):
    # Add Gaussian noise
    noise = np.random.normal(0, 3, img.shape)  # Mean 0, std 3
    noisy_img = np.clip(img + noise, 0, 1)  # Ensure pixel values are between 0 and 1

    # Add blur
    blurred_img = cv2.GaussianBlur(noisy_img, (9, 9), 0)  # Gaussian blur with a kernel size of 9

    return blurred_img

# Load training and validation data
X_train, y_train = load_images_and_labels(train_dataset_path)
X_val, y_val = load_images_and_labels(test_dataset_path)

# Print the shape of the loaded arrays
print('Shape of training images:', X_train.shape)
print('Shape of training labels:', y_train.shape)
print('Shape of training images:', X_val.shape)
print('Shape of training labels:', y_val.shape)

# Augment the entire training set
X_train_augmented = np.array([augment_images(img) for img in X_train])

# Combine original and augmented data (optional)
X_train_combined = np.vstack((X_train, X_train_augmented))
y_train_combined = np.hstack((y_train, y_train))  # Double the labels as well

# Create data generator
datagen = ImageDataGenerator()

# Print the shape of the combined arrays
print('Shape of combined training images:', X_train_combined.shape)
print('Shape of combined training labels:', y_train_combined.shape)

# Create data generators
train_generator = datagen.flow(
    X_train_combined,
    y_train_combined,
    batch_size=32
)

validation_generator = datagen.flow(
    X_val,
    y_val,
    batch_size=32
)

Shape of training images: (3910, 94, 94, 3)
Shape of training labels: (3910,)
Shape of training images: (986, 94, 94, 3)
Shape of training labels: (986,)
Shape of combined training images: (7820, 94, 94, 3)
Shape of combined training labels: (7820,)
