##EfficientNetB0

- k-fold cross-validation applied
- EfficientNetB0(weights='imagenet') - with imagenet weights
- IMG_SIZE = 32
- BATCH_SIZE = 32
- EPOCHS = 30
- Lr = 0.001

##Findings:
Accuracy: 98.14%

#Mount Google Drive and import libraries

In [None]:
# Mount google drive
from google.colab import drive
drive.mount('/content/drive')

# Import necessary libraries
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from keras.callbacks import EarlyStopping
from tensorflow.keras import layers, models
from tensorflow import keras
from tensorflow.keras.applications import EfficientNetB0

Mounted at /content/drive


#Dataset:

##Load training data

In [None]:
# Define paths to dataset directories
train_dir = '/content/drive/My Drive/Data/GTSRB/Train/GTSRB/Final_Training/Images/'
test_dir = '/content/drive/My Drive/Data/GTSRB/Test/GTSRB/Final_Test/Images/'

# Define image size
IMG_SIZE = 32  # Resize images
BATCH_SIZE = 32
num_classes = 43

# Helper function to load images and labels
def load_data(data_dir):
    images = []
    labels = []

    # Valid image file extensions
    valid_extensions = ['.jpg', '.jpeg', '.png', '.ppm']

    # Loop through each subdirectory (which corresponds to a class)
    for label_dir in os.listdir(data_dir):
        label_path = os.path.join(data_dir, label_dir)

        if os.path.isdir(label_path):
            label = int(label_dir)  # Subdirectory name is the class label

            for img_file in os.listdir(label_path):
                img_path = os.path.join(label_path, img_file)

                # Check if the file has a valid image extension
                _, file_extension = os.path.splitext(img_file)
                if file_extension.lower() not in valid_extensions:
                    print(f"Skipping non-image file: {img_file}")
                    continue  # Skip non-image files (e.g., CSV)

                # Read the image using OpenCV
                image = cv2.imread(img_path)

                # Resize image
                image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))

                # Append the image and label to the lists
                images.append(image)
                labels.append(label)

    # Convert to NumPy arrays and normalize
    images = np.array(images, dtype='float32') / 255.0  # Normalize images
    labels = to_categorical(labels, num_classes)  # One-hot encode labels

    return images, labels

# Load training data
images, labels = load_data(train_dir)

Skipping non-image file: GT-00000.csv
Skipping non-image file: GT-00001.csv
Skipping non-image file: GT-00002.csv
Skipping non-image file: GT-00003.csv
Skipping non-image file: GT-00004.csv
Skipping non-image file: GT-00005.csv
Skipping non-image file: GT-00006.csv
Skipping non-image file: GT-00007.csv
Skipping non-image file: GT-00008.csv
Skipping non-image file: GT-00009.csv
Skipping non-image file: GT-00010.csv
Skipping non-image file: GT-00011.csv
Skipping non-image file: GT-00012.csv
Skipping non-image file: GT-00013.csv
Skipping non-image file: GT-00014.csv
Skipping non-image file: GT-00015.csv
Skipping non-image file: GT-00016.csv
Skipping non-image file: GT-00017.csv
Skipping non-image file: GT-00018.csv
Skipping non-image file: GT-00019.csv
Skipping non-image file: GT-00020.csv
Skipping non-image file: GT-00021.csv
Skipping non-image file: GT-00022.csv
Skipping non-image file: GT-00023.csv
Skipping non-image file: GT-00024.csv
Skipping non-image file: GT-00025.csv
Skipping non

##Data Transformation

In [None]:
### Image Augmentation ###
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create an ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    rotation_range=15,        # Rotate images by up to 15 degrees
    zoom_range=0.2,           # Random zoom in/out by up to 20%
    width_shift_range=0.1,    # Shift the image horizontally by 10%
    height_shift_range=0.1,   # Shift the image vertically by 10%
)

# Fit the generator on the training data
datagen.fit(images)

# Create the augmented data generator for training
train_generator = datagen.flow(images, labels, batch_size=BATCH_SIZE, shuffle=True)

##Apply k-fold cross validation

In [None]:
import numpy as np
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Define the number of folds
K = 5

# Convert categorical labels back to integers for StratifiedKFold
y_labels = np.argmax(labels, axis=1)

# Define Stratified K-Fold
skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)

# Function to create the EfficientNetB0 model
def create_model(input_shape, num_classes):
    base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=input_shape)

    base_model.trainable = True

    # Customize model by adding some additional layers
    model = tf.keras.Sequential([
        base_model,
        keras.layers.BatchNormalization(),
        keras.layers.Flatten(),
        keras.layers.Dense(512, activation='sigmoid'),
        keras.layers.Dense(43, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
import os

# Directory to save models
model_save_dir = '/content/drive/My Drive/UH/Data Science Project/GTSRB/Models/'

# Ensure the directory exists
os.makedirs(model_save_dir, exist_ok=True)

# Perform K-Fold Cross-Validation
fold_no = 1
accuracies = []
model_paths = []

for train_idx, val_idx in skf.split(images, y_labels):
    print(f"\nTraining fold {fold_no}...")

    # Split the data into training and validation sets
    X_train, X_val = images[train_idx], images[val_idx]
    y_train, y_val = labels[train_idx], labels[val_idx]

    # Create a new model instance
    model = create_model(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_classes=num_classes)

    # Define early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    # Train the model
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=BATCH_SIZE,
        callbacks=[early_stopping],
        verbose=1
    )

    # Evaluate the model on the validation set
    val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
    print(f"Fold {fold_no} - Validation Accuracy: {val_accuracy:.4f}")
    accuracies.append(val_accuracy)

    # Save the model
    model_path = os.path.join(model_save_dir, f'model_fold_{fold_no}.h5')
    model.save(model_path)
    model_paths.append(model_path)

    fold_no += 1


Training fold 1...
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 64ms/step - accuracy: 0.6204 - loss: 1.3212 - val_accuracy: 0.9459 - val_loss: 0.1708
Epoch 2/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 21ms/step - accuracy: 0.9379 - loss: 0.2019 - val_accuracy: 0.9538 - val_loss: 0.1658
Epoch 3/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - accuracy: 0.9588 - loss: 0.1420 - val_accuracy: 0.9833 - val_loss: 0.0595
Epoch 4/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - accuracy: 0.9684 - loss: 0.1131 - val_accuracy: 0.9591 - val_loss: 0.1129
Epoch 5/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - accuracy: 0.9791 - loss: 0.0709 - val_accuracy: 0.99



Fold 1 - Validation Accuracy: 0.9945

Training fold 2...
Epoch 1/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 53ms/step - accuracy: 0.6126 - loss: 1.3757 - val_accuracy: 0.9042 - val_loss: 0.2766
Epoch 2/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 22ms/step - accuracy: 0.9406 - loss: 0.2081 - val_accuracy: 0.9462 - val_loss: 0.1763
Epoch 3/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - accuracy: 0.9627 - loss: 0.1319 - val_accuracy: 0.9397 - val_loss: 0.2380
Epoch 4/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 22ms/step - accuracy: 0.9745 - loss: 0.0956 - val_accuracy: 0.9777 - val_loss: 0.1001
Epoch 5/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 20ms/step - accuracy: 0.9725 - loss: 0.0966 - val_accuracy: 0.9865 - val_loss: 0.0530
Epoch 6/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 20ms/step - accuracy: 0.9812 - loss: 0.0665 



Fold 2 - Validation Accuracy: 0.9865

Training fold 3...
Epoch 1/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 57ms/step - accuracy: 0.6175 - loss: 1.3517 - val_accuracy: 0.9231 - val_loss: 0.2532
Epoch 2/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 22ms/step - accuracy: 0.9421 - loss: 0.1993 - val_accuracy: 0.9621 - val_loss: 0.1219
Epoch 3/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 20ms/step - accuracy: 0.9610 - loss: 0.1336 - val_accuracy: 0.9587 - val_loss: 0.1995
Epoch 4/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 22ms/step - accuracy: 0.9713 - loss: 0.1026 - val_accuracy: 0.9772 - val_loss: 0.0782
Epoch 5/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 23ms/step - accuracy: 0.9782 - loss: 0.0776 - val_accuracy: 0.9667 - val_loss: 0.1413
Epoch 6/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 23ms/step - accuracy: 0.9785 - loss: 0.0762 



Fold 3 - Validation Accuracy: 0.9932

Training fold 4...
Epoch 1/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 57ms/step - accuracy: 0.6268 - loss: 1.3174 - val_accuracy: 0.9327 - val_loss: 0.2294
Epoch 2/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 21ms/step - accuracy: 0.9351 - loss: 0.2182 - val_accuracy: 0.9684 - val_loss: 0.1020
Epoch 3/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 23ms/step - accuracy: 0.9655 - loss: 0.1197 - val_accuracy: 0.9695 - val_loss: 0.1086
Epoch 4/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 23ms/step - accuracy: 0.9705 - loss: 0.1057 - val_accuracy: 0.9334 - val_loss: 0.5058
Epoch 5/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 21ms/step - accuracy: 0.9781 - loss: 0.0772 - val_accuracy: 0.9848 - val_loss: 0.0542
Epoch 6/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 22ms/step - accuracy: 0.9765 - loss: 0.0902 



Fold 4 - Validation Accuracy: 0.9954

Training fold 5...
Epoch 1/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 67ms/step - accuracy: 0.6181 - loss: 1.3519 - val_accuracy: 0.9402 - val_loss: 0.2005
Epoch 2/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 22ms/step - accuracy: 0.9403 - loss: 0.2015 - val_accuracy: 0.9250 - val_loss: 0.2620
Epoch 3/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 24ms/step - accuracy: 0.9550 - loss: 0.1506 - val_accuracy: 0.9398 - val_loss: 0.2571
Epoch 4/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 23ms/step - accuracy: 0.9666 - loss: 0.1174 - val_accuracy: 0.9855 - val_loss: 0.0548
Epoch 5/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 24ms/step - accuracy: 0.9784 - loss: 0.0757 - val_accuracy: 0.9579 - val_loss: 0.1491
Epoch 6/10
[1m981/981[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 24ms/step - accuracy: 0.9769 - loss: 0.0815 



Fold 5 - Validation Accuracy: 0.9932


In [None]:
# Summary of results
print("\nCross-Validation Results:")
print(f"Average Validation Accuracy: {np.mean(accuracies):.4f}")
print(f"Standard Deviation: {np.std(accuracies):.4f}")


Cross-Validation Results:
Average Validation Accuracy: 0.9926
Standard Deviation: 0.0032


#Evaluate the model

#Evaluate on test data (unseen data)

##Load test data

In [None]:
# Load the test dataset
def load_test_data(test_dir):
    images = []
    img_names = []

    # Valid image file extensions
    valid_extensions = ['.jpg', '.jpeg', '.png', '.ppm']

    for img_file in os.listdir(test_dir):
        img_path = os.path.join(test_dir, img_file)

        # Check if the file is an image
        _, file_extension = os.path.splitext(img_file)
        if file_extension.lower() not in valid_extensions:
            print(f"Skipping non-image file: {img_file}")
            continue  # Skip non-image files

        # Read and resize the image
        image = cv2.imread(img_path)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))

        # Append the image and image file name to the lists
        images.append(image)
        img_names.append(img_file)

    # Convert images to a NumPy array
    images = np.array(images, dtype='float32') / 255.0  # Normalize images

    return images, img_names

# Load test images
X_test, test_img_names = load_test_data(test_dir)

Skipping non-image file: GT-final_test.test.csv


In [None]:
# Path to test CSV file
test_csv_path = '/content/drive/My Drive/Data/GTSRB/Test/GTSRB/Final_Test/Images/GT-final_test.test.csv'
# Load the test labels from the CSV file
test_data = pd.read_csv(test_csv_path, sep=";")

##Make predictions

In [None]:
# Make predictions using the average of saved models
predictions = np.zeros((X_test.shape[0], num_classes))

for model_path in model_paths:
    print(f"Loading model from {model_path}")
    model = keras.models.load_model(model_path)
    predictions += model.predict(X_test)

# Average predictions
predictions /= len(model_paths)

# Get the class index with the highest probability
predicted_classes = np.argmax(predictions, axis=1)

Loading model from /content/drive/My Drive/UH/Data Science Project/GTSRB/Models/model_fold_1.h5




[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 22ms/step
Loading model from /content/drive/My Drive/UH/Data Science Project/GTSRB/Models/model_fold_2.h5




[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step
Loading model from /content/drive/My Drive/UH/Data Science Project/GTSRB/Models/model_fold_3.h5




[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step
Loading model from /content/drive/My Drive/UH/Data Science Project/GTSRB/Models/model_fold_4.h5




[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 16ms/step
Loading model from /content/drive/My Drive/UH/Data Science Project/GTSRB/Models/model_fold_5.h5




[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 19ms/step


##Save test data

In [None]:
# Create a DataFrame to save the image file names and predicted classes
result_df = pd.DataFrame({
    'Filename': test_img_names,
    'Predicted Class': predicted_classes
})

# Ensure the filenames are sorted in ascending order in the result file
result_df = result_df.sort_values(by='Filename')

# Save the results to a CSV file
result_df.to_csv('gtsrb_test_predictions.csv', index=False)
print("Predictions saved to 'gtsrb_test_predictions.csv'")

Predictions saved to 'gtsrb_test_predictions.csv'


##Print test accuracy

In [None]:
import pandas as pd

# Load your predictions
predictions_df = pd.read_csv('gtsrb_test_predictions.csv')

# Load the ground truth file (downloaded separately)
ground_truth_df = pd.read_csv('/content/drive/My Drive/Data/GTSRB/GT-final_test.csv', sep=';')

# Merge both dataframes on the 'Filename' column
merged_df = pd.merge(predictions_df, ground_truth_df[['Filename', 'ClassId']], on='Filename')

# Calculate accuracy by comparing predicted classes with actual classes
accuracy = (merged_df['Predicted Class'] == merged_df['ClassId']).mean()

print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 98.14%


##Classification report

In [None]:
#Classification Report
from sklearn.metrics import classification_report
print(classification_report(merged_df['Predicted Class'], merged_df['ClassId']))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        60
           1       1.00      0.99      0.99       724
           2       1.00      0.98      0.99       762
           3       0.98      0.99      0.98       447
           4       0.98      0.99      0.99       652
           5       0.98      0.97      0.97       636
           6       0.91      1.00      0.95       136
           7       1.00      0.98      0.99       459
           8       0.98      0.99      0.98       448
           9       1.00      1.00      1.00       482
          10       0.99      1.00      0.99       653
          11       0.98      0.92      0.95       449
          12       1.00      1.00      1.00       691
          13       1.00      1.00      1.00       719
          14       1.00      1.00      1.00       270
          15       1.00      1.00      1.00       211
          16       0.99      1.00      1.00       149
          17       1.00    