# **CLASSFICATION BY SHAPE**

This model is built to classify the shape of Road Traffic sign images from given dataset following the process learnt in class and lab sessions, which consists of these main sub-secitons:


1. Importing necessary libraries and functions
2. Declaring datapath: this project involves two students, so we declared two different directories for each.
3. Reading data
4. Checking for data's size, number of images in each categories, duplicates, etc
5. Process data
6. Divide the data to independent train, validation and test set.
7. Building the baseline model
8. Enhance the performance of the model





**DECLARING DIRECTORIES AND READING DATA**

In [1]:
!pip install tabulate

#import basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import PIL
import os, sys
import glob
import pathlib
import tempfile
import PIL
from PIL import Image
from tabulate import tabulate
from google.colab.patches import cv2_imshow
from tensorflow.keras.preprocessing import image
from tensorflow.keras.utils import image_dataset_from_directory
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.layers import Flatten, Dense, BatchNormalization, Dropout, Rescaling, Conv2D, MaxPooling2D, Activation
from tensorflow.keras.optimizers import Adam, SGD

# Mount Google Drive (if not mounted, in which btw, definitely was not)
from google.colab import files
from google.colab import drive
drive.mount('/content/drive',force_remount = True)

os.environ['env_for_s3977994'] = '/content/drive/MyDrive/MachineLearning_ASM2/pyTools/'
os.environ['env_for_s3979391'] = '/content/drive/My Drive/models/'
env = os.getenv('env_for_s3977994')
sys.path.append(env)

# Custom functions
from utils import *

# Print out versions for important tools
print("Pandas version:", pd.__version__)
print("TensorFlow version:", tf.__version__)
print("Pillow version:", PIL.__version__)

Mounted at /content/drive
Pandas version: 2.0.3
TensorFlow version: 2.15.0
Pillow version: 9.4.0


In [2]:
gpu_conf = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpu_conf:
    tf.config.experimental.set_memory_growth(gpu, True)

**ERROR HANDLING FOR READING DATA DIRECTORIES**

In [3]:
# Define the path to your Google Drive folder containing the images
google_drive_folder = "../content/drive/MyDrive/MachineLearning_ASM2/trafficsigns_dataset"

# Create a pathlib.Path object pointing to your Google Drive folder
data_dir = pathlib.Path(google_drive_folder)

# Ensure the directory exists
if not data_dir.exists():
    raise ValueError(f"Directory {data_dir} does not exist. Please check the path.")

**TOTAL NUMBER OF IMAGES**

In [4]:
image_count = len(list(data_dir.glob('*/**/*.png')))
print(image_count)

3699


**DIRECTORY TREE**

In [5]:
print_directory_tree(google_drive_folder)

├───trafficsigns_dataset/
    ├───square/
        ├───continue/
        ├───parking/
        ├───crossing/
        ├───laneend/
    ├───hex/
        ├───stop/
    ├───triangle/
        ├───giveway/
    ├───round/
        ├───speed/
        ├───limitedtraffic/
        ├───noparking/
        ├───bicycle/
        ├───roundabout/
        ├───noentry/
        ├───trafficdirective/
        ├───traveldirection/
    ├───diamond/
        ├───rightofway/


**PRINT NUMBER OF IMAGES FOR EACH CATEGORY**

In [6]:
print_summary(data_dir)

+------------------+------------------+
|    Directory     | Number of Images |
+------------------+------------------+
|     continue     |       199        |
|     parking      |       276        |
|     crossing     |        95        |
|     laneend      |       118        |
|       stop       |        43        |
|     giveway      |       231        |
|      speed       |       316        |
|  limitedtraffic  |       125        |
|    noparking     |       242        |
|     bicycle      |       285        |
|    roundabout    |        98        |
|     noentry      |       375        |
| trafficdirective |       195        |
| traveldirection  |       124        |
|    rightofway    |       282        |
+------------------+------------------+


**DUPLICATE**

In [7]:
print_duplicates(data_dir)

KeyboardInterrupt: 

**LABEL DATA**


In [8]:
# Use the functions
data = labeled_by_shape(data_dir)
print("Classes:", data.class_names)
image_count = len(list(data_dir.glob('square/**/*.png')))
print("Total number of images in square class: ", image_count)
image_count = len(list(data_dir.glob('hex/**/*.png')))
print("Total number of images in hex class: ", image_count)
image_count = len(list(data_dir.glob('triangle/**/*.png')))
print("Total number of images in triangle class: ", image_count)
image_count = len(list(data_dir.glob('round/**/*.png')))
print("Total number of images in round class: ", image_count)
image_count = len(list(data_dir.glob('diamond/**/*.png')))
print("Total number of images in diamond class: ", image_count)

Found 3699 files belonging to 5 classes.
Classes: ['diamond', 'hex', 'round', 'square', 'triangle']
Total number of images in square class:  688
Total number of images in hex class:  43
Total number of images in triangle class:  926
Total number of images in round class:  1760
Total number of images in diamond class:  282


**CALCULATE CLASS WEIGHT**

In [9]:
class_counts = {
    'square': len(list(data_dir.glob('square/**/*.png'))),
    'hex': len(list(data_dir.glob('hex/**/*.png'))),
    'triangle': len(list(data_dir.glob('triangle/**/*.png'))),
    'round': len(list(data_dir.glob('round/**/*.png'))),
    'diamond': len(list(data_dir.glob('diamond/**/*.png')))
}

# Get the list of class names in the same order as the indices
class_names = list(class_counts.keys())

# Create a label encoder to convert class names to indices
label_encoder = LabelEncoder()
label_encoder.fit(class_names)

# Convert class names to class indices
class_indices = {class_name: label_encoder.transform([class_name])[0] for class_name in class_names}

total_samples = sum(class_counts.values())
class_weights = {cls: total_samples / (len(class_counts) * count) for cls, count in class_counts.items()}
print(class_weights)

# Assign unique integer labels to each class
label_encoder = {class_name: i for i, class_name in enumerate(class_weights.keys())}

# Encode class weights with integer labels
encoded_class_weights = {label_encoder[class_name]: weight for class_name, weight in class_weights.items()}
print(encoded_class_weights)

{'square': 1.0752906976744185, 'hex': 17.204651162790697, 'triangle': 0.7989200863930885, 'round': 0.4203409090909091, 'diamond': 2.623404255319149}
{0: 1.0752906976744185, 1: 17.204651162790697, 2: 0.7989200863930885, 3: 0.4203409090909091, 4: 2.623404255319149}


In [10]:
# # Normalize class weights
# max_weight = max(class_weights.values())
# for class_name in class_weights:
#     class_weights[class_name] /= max_weight

# # Print the normalized class weights
# print(class_weights)

**BASELINE MODEL**

In [11]:
train_size = int(len(data)*.7)
val_size = int(len(data)*.2)
test_size = int(len(data)*.1)

train = data.take(train_size)
val = data.skip(train_size).take(val_size)
test = data.skip(train_size+val_size).take(test_size)

Starting with a multilayer perceptron model (MLP)

In [12]:
baseline_model = Sequential([
    Rescaling(1./255, input_shape=(256, 256, 1)),
    Flatten(),
    BatchNormalization(),  # Batch normalization
    Dense(256, activation='relu'),
    Dropout(0.5),
    BatchNormalization(),  # Batch normalization
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')
])

In [None]:
baseline_model.compile (
    optimizer='SGD',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['sparse_categorical_accuracy']
)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='logs')

baseline_model.fit(
    train,
    epochs=20,
    batch_size = 5,
    validation_data=val,
    callbacks=[tensorboard_callback],  # Any additional callbacks you might need
    class_weight=encoded_class_weights
)

Epoch 1/20


  output, from_logits = _get_logits(




In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Activation

model2 = Sequential()

model2.add(Conv2D(filters = 16, kernel_size=(10,10), padding = 'Same', activation = 'relu', input_shape = (256, 256, 1)))
model2.add(MaxPooling2D(pool_size=(2,2)))

model2.add(Conv2D(filters = 32, kernel_size=(10,10), padding = 'Same', activation = 'relu'))
model2.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

model2.add(Conv2D(filters = 32, kernel_size =(10,10), padding = 'Same', activation = 'relu'))
model2.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

# model2.add(Conv2D(filters = 32, kernel_size =(3,3), padding = 'Same', activation = 'relu'))
# model2.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

model2.add(Flatten())
model2.add(Dense(32))
model2.add(Activation('relu'))
model2.add(Dense(5, activation = 'softmax'))

In [None]:
model2.compile (
    optimizer=Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'],
)

model2.fit(
    train,
    epochs=20,
    batch_size = 32,
    validation_data=val,
    # class_weight=class_weights
)

# print(data)
# print(data.class_names)
# print(train)
# print(train.class_names)
# print(test)
# print(test.class_names)
# print(val)
# print(val.class_names)

