# Creating the subset of the Cityscapes dataset

This code creates the "train" and "valid" datasets from the unzipped folders from Cityscapes

In [None]:
import csv
import os
import shutil

## Step 1. Download the needed *zip files from Cityscapes website

Download the dataset here: 

**Cityscapes: Semantic Understanding of Urban Street Scenes.** https://www.cityscapes-dataset.com/downloads
(Note: You need to register to download the files.)

Given that these are large files of several or more Gigabytes, it is important for the practitioner to judiciously select only the data needed for the specific task of interest. The focus of this study is to test the performance of different models for semantic segmentation, so the following datasets (file size in paratheses) were downloaded: **gtFine_trainvaltest.zip** (241MB) and **leftmg8bit_trainvaltest.zip** (11GB). 

Unzip both files in the SemanticSegmentationCityscapes/ directory.

Your directory structure should now look like:

```
SemanticSegmentationCityscapes/

   leftImg8bit_trainvaltest/
   gtFine_trainvaltest/
   
   gtFine_trainvaltest.zip
   leftImg8bit_trainvaltest.zip
   
   seg_code/
      __init__.py
      models.py
      train.py
      utils.py
      special_transforms.py
   ```


### Step 2. Create the small_dataset directory

In [3]:
create_data_directory = './small_dataset/'

isExist = os.path.exists(create_data_directory)
if not isExist:
    os.makedirs(create_data_directory)

Your directory structure should now look like:

```
SemanticSegmentationCityscapes/

   leftImg8bit_trainvaltest/
   gtFine_trainvaltest/
   
   gtFine_trainvaltest.zip
   leftImg8bit_trainvaltest.zip
   
   small_dataset/
   
   seg_code/
      __init__.py
      models.py
      train.py
      utils.py
      special_transforms.py
   ```


In [4]:
SOURCE_ROOT = '.'


In [5]:
# Test directory
gtFineDIR_test = os.path.join(SOURCE_ROOT, 'gtFine_trainvaltest/gtFine/test')
ImgDIR_test = os.path.join(SOURCE_ROOT, 'leftImg8bit_trainvaltest/leftImg8bit/test')

DEST_ROOT_Test = os.path.join(create_data_directory, 'test')

# Train directory
gtFineDIR_train = os.path.join(SOURCE_ROOT, 'gtFine_trainvaltest/gtFine/train')
ImgDIR_train = os.path.join(SOURCE_ROOT, 'leftImg8bit_trainvaltest/leftImg8bit/train')

DEST_ROOT_Train = os.path.join(create_data_directory,'train')

# Validation directory
gtFineDIR_valid = os.path.join(SOURCE_ROOT, 'gtFine_trainvaltest/gtFine/val')
ImgDIR_valid = os.path.join(SOURCE_ROOT, 'leftImg8bit_trainvaltest/leftImg8bit/val')

DEST_ROOT_Valid = os.path.join(create_data_directory,'valid')

In [6]:
ImgDIR_test

'.\\leftImg8bit_trainvaltest/leftImg8bit/test'

In [7]:
def create_directories(sourcedir, targetdir, cities_list='train', extensions=['labelIds', 'leftImg8bit']):
    isExist = os.path.exists(targetdir)
    if not isExist:
        os.makedirs(targetdir)
    if cities_list == "full":
        files=os.listdir(sourcedir)
    elif cities_list == "test":
        files = ['berlin']
    elif cities_list == "valid":
        files = ['frankfurt']
    elif cities_list == "train":
        files = ['hamburg']        
    for city in files:
        directory_city = os.path.join(sourcedir, city)
        image_files=os.listdir(directory_city)
        for single_image in image_files:
            if any(word in single_image for word in extensions):
                fullpath_image = os.path.join(directory_city, single_image)
                print(fullpath_image)
                try:
                    shutil.copy(fullpath_image, targetdir)
                except WindowsError as e:
                    print(e)

## Generate "train" directory from Hamburg data

In [8]:
create_directories(gtFineDIR_train, DEST_ROOT_Train, "train")
create_directories(ImgDIR_train, DEST_ROOT_Train, "train")

.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_000042_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_000629_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_001106_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_001613_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_002095_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_002338_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_003488_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_003904_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_004985_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_005639_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_006192_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_006322_

.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_088939_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_088983_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_089491_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_089696_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_090398_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_090742_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_091038_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_091155_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_091900_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_092476_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_092850_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/train\hamburg\hamburg_000000_093325_

.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_032906_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_033506_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_034049_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_035568_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_036003_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_036427_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_036527_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_037036_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_037161_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_037279_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_037741_leftImg8bit.png

.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_085413_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_085645_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_085982_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_086499_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_086636_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_087216_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_087822_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_088054_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_088197_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_088627_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/train\hamburg\hamburg_000000_088783_leftImg8bit.png

## Generate "valid" directory from Frankfurt data

In [9]:
create_directories(gtFineDIR_valid, DEST_ROOT_Valid, "valid")
create_directories(ImgDIR_valid, DEST_ROOT_Valid, "valid")

.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_000294_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_000576_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_001016_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_001236_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_001751_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_002196_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_002963_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_003025_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_003357_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_003920_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000000_004617_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\

.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_044413_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_044525_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_044658_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_044787_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_046126_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_046272_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_046504_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_046779_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_047178_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_047552_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\frankfurt_000001_048196_gtFine_labelIds.png
.\gtFine_trainvaltest/gtFine/val\frankfurt\

.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_011007_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_011074_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_011461_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_011810_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_012009_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_012121_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_012868_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_013067_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_013240_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_013382_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000000_

.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_044658_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_044787_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_046126_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_046272_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_046504_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_046779_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_047178_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_047552_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_048196_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_048355_leftImg8bit.png
.\leftImg8bit_trainvaltest/leftImg8bit/val\frankfurt\frankfurt_000001_

## Directory structure 
Should finally look like this

```
SemanticSegmentationCityscapes/

   small_dataset/
      train/
         (files from train/Hamburg folder with the following extensions:)
         *gtFine_labellds.png
         *leftlmg8bit.png
      valid/
         (files from val/Frankfurt folder with the following extensions:)
         *gtFine_labellds.png
         *leftlmg8bit.png
   seg_code/
      __init__.py
      models.py
      train.py
      utils.py
      special_transforms.py
   ```

## Check what hardware you have

In [None]:
import torch

print(torch.cuda.is_available())

torch.cuda.get_device_name(0)

## Glance at the data dimensions

In [41]:
import cv2

set_images = ['berlin_000000_000019_gtFine_color.png', 'berlin_000000_000019_gtFine_instanceIds.png', 
              'berlin_000000_000019_gtFine_labelIds.png',]

berlin_path = os.path.join(SOURCE_ROOT, 'gtFine_trainvaltest/gtFine/test/berlin/')

for im in set_images:
    print("############################################")
    print(im)
    image13_path = os.path.join(berlin_path, im)

    #print(image13_path)
    # read image
    img = cv2.imread(image13_path, cv2.IMREAD_UNCHANGED)

    #print(img)
    # get dimensions of image
    dimensions = img.shape
    print(dimensions)
    
    # height, width, number of channels in image
    #height = img.shape[0]
    #width = img.shape[1]
    #channels = img.shape[2]

    #print('Image Dimension    : ',dimensions)
    #print('Image Height       : ',height)
    #print('Image Width        : ',width)
    #print('Number of Channels : ',channels)
        

############################################
berlin_000000_000019_gtFine_color.png
(1024, 2048, 4)
############################################
berlin_000000_000019_gtFine_instanceIds.png
(1024, 2048)
############################################
berlin_000000_000019_gtFine_labelIds.png
(1024, 2048)
