In [1]:
import tensorflow as tf
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
from tensorflow import keras
from tensorflow.keras.utils import image_dataset_from_directory  # For creating datasets from directories
from tabulate import tabulate


In [2]:
gpu_conf = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpu_conf:
    tf.config.experimental.set_memory_growth(gpu, True)

In [3]:
data_dir = 'trafficsigns_dataset/trafficsigns_dataset'

In [4]:
def printSummary(path):
    data = []
    dir_name = []
    # Iterate over each subdirectory
    for dirpath, dirnames, filenames in os.walk(path):
        # Count the number of image files in the current subdirectory
        num_images = 0
        for filename in filenames:
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                num_images += 1

        # If any images are found, print a relative path from the base directory
        if num_images > 0:
            dir_name.append(dirnames)
            # Get the relative path from the base directory
            relative_path = os.path.basename(dirpath)
            data.append([relative_path,num_images])
    headers = ['Directory', 'Number of Images']
    print(tabulate(data, headers=headers, tablefmt='pretty'))
printSummary(data_dir)

+------------------+------------------+
|    Directory     | Number of Images |
+------------------+------------------+
|    rightofway    |       282        |
|       stop       |        43        |
|     bicycle      |       285        |
|  limitedtraffic  |       125        |
|     noentry      |       375        |
|    noparking     |       242        |
|    roundabout    |        98        |
|      speed       |       316        |
| trafficdirective |       195        |
| traveldirection  |       124        |
|     continue     |       199        |
|     crossing     |        95        |
|     laneend      |       118        |
|     parking      |       276        |
|     giveway      |       231        |
+------------------+------------------+


In [5]:
# Image dimensions and batch size
img_height = 224
img_width = 224
batch_size = 32

# Function to get data labeled by main folders
def get_data_labeled_by_main_folders(data_dir, img_height, img_width, batch_size):
    return image_dataset_from_directory(
        data_dir,
        shuffle=True,
        batch_size=batch_size,
        image_size=(img_height, img_width),
        label_mode='categorical'  # Options: 'int', 'categorical', None
    )

In [6]:
# Use the functions
dataset_main = get_data_labeled_by_main_folders(data_dir, img_height, img_width, batch_size)

print("Classes (main):", dataset_main.class_names)

Found 3699 files belonging to 5 classes.
Classes (main): ['diamond', 'hex', 'round', 'square', 'triangle']


In [7]:
import os
import shutil
import tempfile
import tensorflow as tf

# Function to get only leaf directories
def get_leaf_directories(data_dir):
    leaf_directories = []
    for dirpath, dirnames, filenames in os.walk(data_dir):
        if not dirnames:
            leaf_directories.append(dirpath)
    return leaf_directories

# Function to concatenate files from leaf directories into a temporary dataset
def create_concatenated_dataset(data_dir):
    # Create a temporary directory
    temp_dir = tempfile.mkdtemp()
    
    try:
        # Get the leaf directories and copy their contents to the temp directory
        leaf_directories = get_leaf_directories(data_dir)

        for leaf_dir in leaf_directories:
            class_name = os.path.basename(leaf_dir)
            dest_path = os.path.join(temp_dir, class_name)
            os.makedirs(dest_path, exist_ok=True)

            for filename in os.listdir(leaf_dir):
                file_path = os.path.join(leaf_dir, filename)
                if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                    shutil.copy(file_path, dest_path)

        # Create a TensorFlow dataset from the concatenated files
        concatenated_dataset = tf.keras.utils.image_dataset_from_directory(
            temp_dir,
            shuffle=True,
            batch_size=32,
            image_size=(224,224),
            label_mode='categorical'
        )

        # Return the dataset and the class names
        return concatenated_dataset
    
    finally:
        # Clean up the temporary directory after processing
        shutil.rmtree(temp_dir)

# Path to the original data directory
data_dir = "trafficsigns_dataset/trafficsigns_dataset"

# Create the dataset and get the class names
dataset = create_concatenated_dataset(data_dir)

# Verify the dataset is correctly loaded
print("Classes:", dataset.class_names)


Found 3699 files belonging to 16 classes.
