In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import os

# Load data

In [2]:
from google.colab import files
files.upload()


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"mohammedayoub968","key":"98ef293618133f9c4a9581633c61a6e6"}'}

In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [4]:
!kaggle datasets download -d meowmeowmeowmeowmeow/gtsrb-german-traffic-sign -p /content


Dataset URL: https://www.kaggle.com/datasets/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign
License(s): CC0-1.0
Downloading gtsrb-german-traffic-sign.zip to /content
 99% 607M/612M [00:01<00:00, 282MB/s]
100% 612M/612M [00:01<00:00, 381MB/s]


In [5]:
!unzip /content/gtsrb-german-traffic-sign.zip -d /content/gtsrb


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/gtsrb/train/5/00005_00053_00010.png  
  inflating: /content/gtsrb/train/5/00005_00053_00011.png  
  inflating: /content/gtsrb/train/5/00005_00053_00012.png  
  inflating: /content/gtsrb/train/5/00005_00053_00013.png  
  inflating: /content/gtsrb/train/5/00005_00053_00014.png  
  inflating: /content/gtsrb/train/5/00005_00053_00015.png  
  inflating: /content/gtsrb/train/5/00005_00053_00016.png  
  inflating: /content/gtsrb/train/5/00005_00053_00017.png  
  inflating: /content/gtsrb/train/5/00005_00053_00018.png  
  inflating: /content/gtsrb/train/5/00005_00053_00019.png  
  inflating: /content/gtsrb/train/5/00005_00053_00020.png  
  inflating: /content/gtsrb/train/5/00005_00053_00021.png  
  inflating: /content/gtsrb/train/5/00005_00053_00022.png  
  inflating: /content/gtsrb/train/5/00005_00053_00023.png  
  inflating: /content/gtsrb/train/5/00005_00053_00024.png  
  inflating: /content/gtsrb/train/5

In [6]:
base_dir = "/content/gtsrb"
print(os.listdir(base_dir))


['meta', 'Meta.csv', 'Train.csv', 'test', 'Meta', 'Test.csv', 'train', 'Train', 'Test']


read files :

In [7]:
train_df = pd.read_csv(os.path.join(base_dir, "Train.csv"))
test_df  = pd.read_csv(os.path.join(base_dir, "Test.csv"))
meta_df  = pd.read_csv(os.path.join(base_dir, "Meta.csv"))

print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)
print("Meta shape:", meta_df.shape)


Train shape: (39209, 8)
Test shape: (12630, 8)
Meta shape: (43, 5)


In [8]:
train_df["full_path"] = train_df["Path"].apply(lambda x: os.path.join(base_dir, x))

In [9]:
train_df.head()

Unnamed: 0,Width,Height,Roi.X1,Roi.Y1,Roi.X2,Roi.Y2,ClassId,Path,full_path
0,27,26,5,5,22,20,20,Train/20/00020_00000_00000.png,/content/gtsrb/Train/20/00020_00000_00000.png
1,28,27,5,6,23,22,20,Train/20/00020_00000_00001.png,/content/gtsrb/Train/20/00020_00000_00001.png
2,29,26,6,5,24,21,20,Train/20/00020_00000_00002.png,/content/gtsrb/Train/20/00020_00000_00002.png
3,28,27,5,6,23,22,20,Train/20/00020_00000_00003.png,/content/gtsrb/Train/20/00020_00000_00003.png
4,28,26,5,5,23,21,20,Train/20/00020_00000_00004.png,/content/gtsrb/Train/20/00020_00000_00004.png


In [10]:
train_df = train_df.drop(columns=["Path"])

In [11]:
train_df.columns

Index(['Width', 'Height', 'Roi.X1', 'Roi.Y1', 'Roi.X2', 'Roi.Y2', 'ClassId',
       'full_path'],
      dtype='object')

Image verification and ROI check:

In [12]:
from PIL import Image

missing_files = []
corrupted_files = []
roi_errors = []

for idx, row in train_df.iterrows():
    img_path = row["full_path"]
    if not os.path.exists(img_path):
        missing_files.append(img_path)
        continue

    try:
        img = Image.open(img_path)
        w, h = img.size
        img.verify()
    except:
        corrupted_files.append(img_path)
        continue

    if not (0 <= row["Roi.X1"] < row["Roi.X2"] <= w and 0 <= row["Roi.Y1"] < row["Roi.Y2"] <= h):
        roi_errors.append(img_path)

print("Total samples:", len(train_df))
print("Missing files:", len(missing_files))
print("Corrupted files:", len(corrupted_files))
print("ROI errors:", len(roi_errors))


Total samples: 39209
Missing files: 0
Corrupted files: 0
ROI errors: 0


In [13]:
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm
import cv2


Crop images by ROI and convert them to 64x64:

In [14]:
def load_data_with_roi(df, target_size=(64,64), num_classes=None):
    X = []
    y = []

    for i, row in tqdm(df.iterrows(), total=len(df)):
        try:
            img = cv2.imread(row['full_path'])

            if img is None:
                continue

            x1, y1, x2, y2 = int(row['Roi.X1']), int(row['Roi.Y1']), int(row['Roi.X2']), int(row['Roi.Y2'])
            roi = img[y1:y2, x1:x2]

            roi = cv2.resize(roi, target_size)

            roi = roi.astype("float32") / 255.0

            X.append(roi)
            y.append(row['ClassId'])

        except Exception as e:
            print(f"wrong in an image {row['full_path']}: {e}")
            continue

    X = np.array(X)
    y = np.array(y)

    if num_classes is None:
        num_classes = len(np.unique(y))
    y = to_categorical(y, num_classes=num_classes)

    return X, y

In [15]:
X, y = load_data_with_roi(train_df, target_size=(64,64))
print(X.shape, y.shape)



100%|██████████| 39209/39209 [00:13<00:00, 2989.89it/s]


(39209, 64, 64, 3) (39209, 43)


In [16]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [17]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.callbacks import EarlyStopping
import keras_tuner as kt
from sklearn.model_selection import KFold

Build model for tuner :

In [18]:
def build_model(hp, num_classes):
    model = keras.Sequential()

    # Convolutional Blocks
    for i in range(hp.Int('conv_blocks', 1, 3, default=2)):
        regularizer_choice = hp.Choice('conv_regularizer', ['l1', 'l2', 'l1_l2', 'None'])
        if regularizer_choice == 'l1':
            conv_regularizer = regularizers.l1(hp.Float('conv_l1', 1e-5, 1e-2, sampling='log'))
        elif regularizer_choice == 'l2':
            conv_regularizer = regularizers.l2(hp.Float('conv_l2', 1e-5, 1e-2, sampling='log'))
        elif regularizer_choice == 'l1_l2':
            conv_regularizer = regularizers.L1L2(
                l1=hp.Float('conv_l1_l2_l1', 1e-5, 1e-2, sampling='log'),
                l2=hp.Float('conv_l1_l2_l2', 1e-5, 1e-2, sampling='log')
            )
        else:
            conv_regularizer = None


        model.add(layers.Conv2D(
            filters=hp.Choice(f'filters_{i}', values=[32, 64, 128]),
            kernel_size=hp.Choice(f'kernel_{i}', values=[3, 5]),
            activation=hp.Choice('conv_activation', ['relu', 'tanh', 'sigmoid']),
            padding='same',
            kernel_regularizer=conv_regularizer
        ))
        model.add(layers.MaxPooling2D(pool_size=2))

    # Dense Layers
    regularizer_choice = hp.Choice('dense_regularizer', ['l1', 'l2', 'l1_l2', 'None'])
    if regularizer_choice == 'l1':
        dense_regularizer = regularizers.l1(hp.Float('dense_l1', 1e-5, 1e-2, sampling='log'))
    elif regularizer_choice == 'l2':
        dense_regularizer = regularizers.l2(hp.Float('dense_l2', 1e-5, 1e-2, sampling='log'))
    elif regularizer_choice == 'l1_l2':
        dense_regularizer = regularizers.L1L2(
            l1=hp.Float('dense_l1_l2_l1', 1e-5, 1e-2, sampling='log'),
            l2=hp.Float('dense_l1_l2_l2', 1e-5, 1e-2, sampling='log')
        )
    else:
        dense_regularizer = None

    model.add(layers.Flatten())
    model.add(layers.Dense(
        units=hp.Int('dense_units', 64, 512, step=64),
        activation=hp.Choice('dense_activation', ['relu', 'tanh', 'sigmoid']),
        kernel_regularizer=dense_regularizer
    ))
    model.add(layers.Dropout(hp.Float('dropout', 0.2, 0.5, step=0.1)))

    # Output Layer → pass num_classes as global
    model.add(layers.Dense(num_classes, activation='softmax'))

    # Optimizer
    optimizer_choice = hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd'])
    if optimizer_choice == 'adam':
        optimizer = keras.optimizers.Adam(
            learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
        )
    elif optimizer_choice == 'rmsprop':
        optimizer = keras.optimizers.RMSprop(
            learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
        )
    else:
        optimizer = keras.optimizers.SGD(
            learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4]),
            momentum=hp.Float('momentum', 0.0, 0.9, step=0.1)
        )

    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

EarlyStopping:

In [19]:
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)


KFold :

In [20]:
kf = KFold(n_splits=3, shuffle=True, random_state=42)
fold_no = 1
num_classes = train_df['ClassId'].nunique()

for train_idx, val_idx in kf.split(X):
    print(f"\n🔹 Fold {fold_no}")
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    # Use tuner inside fold
    tuner = kt.RandomSearch(
        lambda hp: build_model(hp, num_classes),
        objective='val_accuracy',
        max_trials=5,
        executions_per_trial=1,
        overwrite=True,
        directory='tuner_dir',
        project_name=f'fold_{fold_no}'
    )

    tuner.search(
        X_train, y_train,
        epochs=15,
        validation_data=(X_val, y_val),
        callbacks=[early_stop],
        verbose=1
    )

    fold_no += 1

Trial 5 Complete [00h 01m 35s]
val_accuracy: 0.05807636305689812

Best val_accuracy So Far: 0.9885990023612976
Total elapsed time: 00h 16m 08s


In [21]:
best_model = tuner.get_best_models(num_models=1)[0]


In [22]:
history = best_model.fit(
    X_train, y_train,
    epochs=15,
    batch_size=64,
    validation_split=0.1,
    callbacks=[early_stop],
    verbose=1
)


Epoch 1/15
[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 49ms/step - accuracy: 0.9808 - loss: 0.2315 - val_accuracy: 0.9709 - val_loss: 0.1943
Epoch 2/15
[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 30ms/step - accuracy: 0.9827 - loss: 0.2104 - val_accuracy: 0.9629 - val_loss: 0.2225
Epoch 3/15
[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 30ms/step - accuracy: 0.9849 - loss: 0.2041 - val_accuracy: 0.9258 - val_loss: 0.3408
Epoch 4/15
[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 30ms/step - accuracy: 0.9853 - loss: 0.2014 - val_accuracy: 0.8458 - val_loss: 0.5826
