In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip /content/drive/MyDrive/project/archive.zip -d /content/gtsrb

In [None]:
import pandas as pd
import os
from PIL import Image

# Set the base path to your dataset directory
base_path = '/content/gtsrb'

In [None]:

# List all directories within the train path (each class directory)
class_dirs = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]

# Iterate through each class directory and create a 'labels' subdirectory
for class_id in class_dirs:
    labels_path = os.path.join(base_path, class_id, 'labels')
    os.makedirs(labels_path, exist_ok=True)
    print(f"Created or verified labels folder: {labels_path}")


In [None]:
import numpy as np
import shutil

def split_data_and_csv(source, train_dest, valid_dest, csv_path, split_ratio=0.2):
    # Read the CSV file
    df = pd.read_csv(csv_path)

    # Create mappings from image filenames to csv rows
    file_to_row = {row['Path']: row for index, row in df.iterrows()}

    # Create new dataframes for train and valid splits
    train_df = pd.DataFrame(columns=df.columns)
    valid_df = pd.DataFrame(columns=df.columns)

    # For each class directory in the source
    for class_dir in os.listdir(source):
        class_path = os.path.join(source, class_dir)
        images = os.listdir(class_path)
        np.random.shuffle(images)  # Shuffle the list of images

        # Split into train and validation
        split_point = int(len(images) * (1 - split_ratio))
        valid_images = images[split_point:]
        train_images = images[:split_point]

        # Ensure target directories exist
        os.makedirs(os.path.join(train_dest, class_dir), exist_ok=True)
        os.makedirs(os.path.join(valid_dest, class_dir), exist_ok=True)

        # Move files and divide CSV rows
        for image in train_images:
            shutil.move(os.path.join(class_path, image), os.path.join(train_dest, class_dir))
            train_df = pd.concat([train_df, df[df['Path'] == image]], ignore_index=True)

        for image in valid_images:
            shutil.move(os.path.join(class_path, image), os.path.join(valid_dest, class_dir))
            valid_df = pd.concat([valid_df, df[df['Path'] == image]], ignore_index=True)

    # Save new CSV files
    train_df.to_csv(os.path.join(train_dest, 'train_labels.csv'), index=False)
    valid_df.to_csv(os.path.join(valid_dest, 'valid_labels.csv'), index=False)

# Define paths
base_path = '/content/gtsrb'
source_path = os.path.join(base_path, 'train')  # Adjust if different
train_path = os.path.join(base_path, 'train_new')
valid_path = os.path.join(base_path, 'valid_new')
csv_path = os.path.join(base_path, 'Train.csv')  # Path to your full CSV file

# Execute the function
split_data_and_csv(source_path, train_path, valid_path, csv_path)


In [None]:
# Assuming CSV annotations are available
# GTSRB typically comes with CSV files that list image names, bounding boxes, and class labels
annotations_csv_train = pd.read_csv('/content/gtsrb/train_new/train_labels.csv')
annotations_csv_valid = pd.read_csv('/content/gtsrb/valid_new/valid_labels.csv')

for _, row in annotations_csv_train.iterrows():
    # Construct the image path
    img_path = os.path.join(base_path, row['Path'])
    image = Image.open(img_path)
    iw, ih = image.size

    # Normalize the bounding box dimensions
    x_center = (row['Roi.X1'] + (row['Roi.X2'] - row['Roi.X1']) / 2) / iw
    y_center = (row['Roi.Y1'] + (row['Roi.Y2'] - row['Roi.Y1']) / 2) / ih
    width = (row['Roi.X2'] - row['Roi.X1']) / iw
    height = (row['Roi.Y2'] - row['Roi.Y1']) / ih

    # Format for YOLO
    yolo_data = f"{row['ClassId']} {x_center} {y_center} {width} {height}\n"

    # Write to the corresponding .txt file
    txt_path = os.path.join(base_path, 'train_new/labels', os.path.splitext(os.path.basename(row['Path']))[0] + '.txt')
    with open(txt_path, 'a') as file:
        file.write(yolo_data)

for _, row in annotations_csv_valid.iterrows():
    # Construct the image path
    img_path = os.path.join(base_path, row['Path'])
    image = Image.open(img_path)
    iw, ih = image.size

    # Normalize the bounding box dimensions
    x_center = (row['Roi.X1'] + (row['Roi.X2'] - row['Roi.X1']) / 2) / iw
    y_center = (row['Roi.Y1'] + (row['Roi.Y2'] - row['Roi.Y1']) / 2) / ih
    width = (row['Roi.X2'] - row['Roi.X1']) / iw
    height = (row['Roi.Y2'] - row['Roi.Y1']) / ih

    # Format for YOLO
    yolo_data = f"{row['ClassId']} {x_center} {y_center} {width} {height}\n"

    # Write to the corresponding .txt file
    txt_path = os.path.join(base_path, 'valid_new/labels', os.path.splitext(os.path.basename(row['Path']))[0] + '.txt')
    with open(txt_path, 'a') as file:
        file.write(yolo_data)
