In [17]:
import os

def read_image_folders(base_path):
    dataset = {}
    for split in ['train', 'test', 'val']:
        split_path = os.path.join(base_path, split)
        dataset[split] = {}
        
        if not os.path.exists(split_path):
            print(f"Directory {split_path} does not exist.")
            continue
        
        for category in os.listdir(split_path):
            category_path = os.path.join(split_path, category)
            if os.path.isdir(category_path):
                dataset[split][category] = []
                for image in os.listdir(category_path):
                    image_path = os.path.join(category_path, image)
                    if os.path.isfile(image_path):
                        dataset[split][category].append(image_path)
    
    return dataset

def print_dataset_structure(dataset):
    for split, categories in dataset.items():
        print(f"\n{split.upper()}:")
        for category, images in categories.items():
            print(f"  {category}: {len(images)} images")

# Example usage:
base_path = 'C:/Users/Dell/OneDrive/Documents/ISB/gtFine_trainvaltest/gtFine'
dataset = read_image_folders(base_path)
print_dataset_structure(dataset)



TRAIN:
  zurich: 488 images
  strasbourg: 1460 images
  weimar: 568 images
  aachen: 696 images
  tubingen: 576 images
  jena: 470 images
  bochum: 384 images
  darmstadt: 340 images
  dusseldorf: 884 images
  hamburg: 992 images
  cologne: 616 images
  monchengladbach: 376 images
  krefeld: 396 images
  ulm: 380 images
  hanover: 784 images
  stuttgart: 784 images
  erfurt: 436 images
  bremen: 1264 images

TEST:
  bielefeld: 724 images
  leverkusen: 232 images
  bonn: 184 images
  berlin: 2176 images
  mainz: 1192 images
  munich: 1592 images

VAL:
  munster: 696 images
  lindau: 236 images
  frankfurt: 1068 images


In [20]:
import os
from PIL import Image, UnidentifiedImageError
import numpy as np

def read_and_preprocess_image(image_path, target_size=(224, 224)):
    try:
        image = Image.open(image_path)
        image = image.resize(target_size)
        image_array = np.array(image) / 255.0  
        return image_array
    except UnidentifiedImageError:
        print(f"Cannot identify image file {image_path}. Skipping...")
        return None

def preprocess_dataset(base_path, target_size=(224, 224), valid_extensions=('.png')):
    dataset = {}
    for split in ['train', 'test', 'val']:
        split_path = os.path.join(base_path, split)
        dataset[split] = {}
        
        if not os.path.exists(split_path):
            print(f"Directory {split_path} does not exist.")
            continue
        
        for category in os.listdir(split_path):
            category_path = os.path.join(split_path, category)
            if os.path.isdir(category_path):
                dataset[split][category] = []
                for image_name in os.listdir(category_path):
                    image_path = os.path.join(category_path, image_name)
                    if os.path.isfile(image_path) and image_name.lower().endswith(valid_extensions):
                        preprocessed_image = read_and_preprocess_image(image_path, target_size)
                        if preprocessed_image is not None:
                            dataset[split][category].append(preprocessed_image)
    
    return dataset

def print_dataset_info(dataset):
    for split, categories in dataset.items():
        print(f"\n{split.upper()}:")
        for category, images in categories.items():
            print(f"  {category}: {len(images)} images")

base_path = 'C:/Users/Dell/OneDrive/Documents/ISB/gtFine_trainvaltest/gtFine'
dataset = preprocess_dataset(base_path)
print_dataset_info(dataset)



TRAIN:
  zurich: 366 images
  strasbourg: 1095 images
  weimar: 426 images
  aachen: 522 images
  tubingen: 432 images
  jena: 354 images
  bochum: 288 images
  darmstadt: 255 images
  dusseldorf: 663 images
  hamburg: 744 images
  cologne: 462 images
  monchengladbach: 282 images
  krefeld: 297 images
  ulm: 285 images
  hanover: 588 images
  stuttgart: 588 images
  erfurt: 327 images
  bremen: 948 images

TEST:
  bielefeld: 543 images
  leverkusen: 174 images
  bonn: 138 images
  berlin: 1632 images
  mainz: 894 images
  munich: 1194 images

VAL:
  munster: 522 images
  lindau: 177 images
  frankfurt: 801 images
