# **CityPerson Dataset EDA (Exploratory Data Analysis)**
A complete EDA (Exploratory Data Analysis) for CityPerson dataset

## Used Dataset:


*   gtFine_trainvaltest
*   gtFinePanopticParts_trainval

## Dataset Structure:

1.   gtFine_trainvaltest
  *   Contains PNG and JSON files.
  *   Organized by:
       * Train, validation, and test folders.
  *   Files Type:
      * *_color.png: Color-coded images for segmentation.
      * *_instanceIds.png: Encoded image masks where each pedestrian is represented with a unique ID.
      * *_labelIds.png: Encoded image masks for class labels.
      * *_polygons.json: Contains polygonal annotations for semantic segmentation, instance
2. gtFinePanopticParts_trainval:
    * Contains TIF files.
    * Organized similarly to gtFine_trainvaltest.
    * File type:
      * *_gtFinePanopticParts.tif: Panoptic segmentation with part-level annotations (e.g., parts of a pedestrian like arms or legs).
val





In [6]:
# Import required liabraries 
import os
import zipfile

In [7]:
# Dataset zip files path from Google Drive
gtFine = '/kaggle/input/cityperson-dataset/gtFine_trainvaltest'
gtFinepanopticParts = '/kaggle/input/cityperson-dataset/gtFinePanopticParts_trainval'

# List the content of both datasets
gtFine_Files = os.listdir(gtFine)
gtFinepano_Files = os.listdir(gtFinepanopticParts)

gtFine_Files, gtFinepano_Files

(['README', 'license.txt', 'gtFine'],
 ['gtFinePanopticParts', 'README_panopticParts.md', 'license.txt'])

In [8]:
# Path to core folder
gtFine_CorePath = os.path.join(gtFine, 'gtFine')
gtFinePano_CorePath = os.path.join(gtFinepanopticParts, 'gtFinePanopticParts')

# List driectories inside core folders
gtFine_Dirs = os.listdir(gtFine_CorePath) if os.path.exists(gtFine_CorePath) else []
gtFinePano_Dirs = os.listdir(gtFinePano_CorePath) if os.path.exists(gtFinePano_CorePath) else []

gtFine_Dirs, gtFinePano_Dirs

(['val', 'test', 'train'], ['val', 'train'])

In [9]:
##### SIMPLE IMPLEMENTATION

# # Listing the sample files from the 'train' directory if it exists in both datasets
# gtfine_trainSample = os.listdir(os.path.join(gtFine_CorePath, 'train')) if 'train' in gtFine_Dirs else []
# gtFinePano_trainSample = os.listdir(os.path.join(gtFinePano_CorePath, 'train')) if 'train' in gtFinePano_Dirs else []

# gtFine_Dirs, gtfine_trainSample[:], gtFinePano_Dirs, gtFinePano_trainSample[:]

######

# Define the subdirectories
subdirs = gtFine_Dirs #["train", "val", "test"]

# Initialize dictionaries to store samples from each subdirectory
gtFine_CityFolders = {}
gtFinePano_CityFolders = {}

# Process each subdirectory
for subdir in subdirs:
    gtFine_CityFolders[subdir] = os.listdir(os.path.join(gtFine_CorePath, subdir)) if subdir in gtFine_Dirs else []
    gtFinePano_CityFolders[subdir] = os.listdir(os.path.join(gtFinePano_CorePath, subdir)) if subdir in gtFinePano_Dirs else []

# Output the first few files for each subdirectory
gtFine_CityFolders_Preview = {key: value[:] for key, value in gtFine_CityFolders.items()}
gtFinePano_CityFolders_Preview = {key: value[:] for key, value in gtFinePano_CityFolders.items()}

gtFine_CityFolders_Preview, gtFinePano_CityFolders_Preview

({'val': ['munster', 'lindau', 'frankfurt'],
  'test': ['mainz', 'bielefeld', 'leverkusen', 'berlin', 'munich', 'bonn'],
  'train': ['dusseldorf',
   'darmstadt',
   'tubingen',
   'erfurt',
   'bochum',
   'krefeld',
   'weimar',
   'bremen',
   'aachen',
   'hamburg',
   'monchengladbach',
   'hanover',
   'zurich',
   'jena',
   'stuttgart',
   'strasbourg',
   'cologne',
   'ulm']},
 {'val': ['munster', 'lindau', 'frankfurt'],
  'test': [],
  'train': ['dusseldorf',
   'darmstadt',
   'tubingen',
   'erfurt',
   'bochum',
   'krefeld',
   'weimar',
   'bremen',
   'aachen',
   'hamburg',
   'monchengladbach',
   'hanover',
   'zurich',
   'jena',
   'stuttgart',
   'strasbourg',
   'cologne',
   'ulm']})