In [6]:
import os
from pathlib import Path
import sys
import shutil
import zipfile

from sklearn.model_selection import train_test_split


In [20]:
!zip

Copyright (c) 1990-2008 Info-ZIP - Type 'zip "-L"' for software license.
Zip 3.0 (July 5th 2008). Usage:
zip [-options] [-b path] [-t mmddyyyy] [-n suffixes] [zipfile list] [-xi list]
  The default action is to add or replace zipfile entries from list, which
  can include the special name - to compress standard input.
  If zipfile and list are omitted, zip compresses stdin to stdout.
  -f   freshen: only changed files  -u   update: only changed or new files
  -d   delete entries in zipfile    -m   move into zipfile (delete OS files)
  -r   recurse into directories     -j   junk (don't record) directory names
  -0   store only                   -l   convert LF to CR LF (-ll CR LF to LF)
  -1   compress faster              -9   compress better
  -q   quiet operation              -v   verbose operation/print version info
  -c   add one-line comments        -z   add zipfile comment
  -@   read names from stdin        -o   make zipfile as old as latest entry
  -x   exclude the following nam

In [2]:
BASE_PATH = Path("./drive/MyDrive/MAGISTERKA")
BASE_PATH.exists()

True

In [7]:
def extract_dataset(archive_name: str):
  with zipfile.ZipFile(BASE_PATH / 'datasets' / archive_name, 'r') as zip_ref:
    zip_ref.extractall(BASE_PATH / 'datasets')

extract_dataset('fog-detection-dataset.zip')

In [8]:
extract_dataset('fog-or-smog-detection-dataset.zip')
extract_dataset('foggy-cityscapes-image-dataset.zip')

In [4]:
def process_class(class_name, source_dir, target_dir, seed=42):
    # class_dir = os.path.join(source_dir, class_name)
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    files = os.listdir(source_dir)
    train_files, test_files = train_test_split(files, test_size=0.3, random_state=seed)
    val_files, test_files = train_test_split(
        test_files, test_size=0.66, random_state=seed
    )
    # Create folders if missing
    for folder in ["train", "val", "test"]:
        folder_path = os.path.join(target_dir, folder, class_name)
        os.makedirs(folder_path, exist_ok=True)
    # Move files
    for file in train_files:
        shutil.copyfile(
            os.path.join(source_dir, file),
            os.path.join(target_dir, "train", class_name, file),
        )
    for file in val_files:
        shutil.copyfile(
            os.path.join(source_dir, file),
            os.path.join(target_dir, "val", class_name, file),
        )
    for file in test_files:
        shutil.copyfile(
            os.path.join(source_dir, file),
            os.path.join(target_dir, "test", class_name, file),
        )

In [10]:
process_class(
    class_name='fog',
    source_dir=BASE_PATH / 'datasets' / 'fog-detection-dataset' / 'fog images',
    target_dir=BASE_PATH / 'datasets' / 'fog-detection-dataset-prepared',
)

process_class(
    class_name='clear',
    source_dir=BASE_PATH / 'datasets' / 'fog-detection-dataset' / 'non-fog images',
    target_dir=BASE_PATH / 'datasets' / 'fog-detection-dataset-prepared',
)

In [11]:
process_class(
    class_name='fog',
    source_dir=BASE_PATH / 'datasets' / 'fog-or-smog-detection-dataset' / 'foggy',
    target_dir=BASE_PATH / 'datasets' / 'fog-or-smog-detection-dataset-prepared',
)

process_class(
    class_name='clear',
    source_dir=BASE_PATH / 'datasets' / 'fog-or-smog-detection-dataset' / 'Clear',
    target_dir=BASE_PATH / 'datasets' / 'fog-or-smog-detection-dataset-prepared',
)

In [13]:
process_class(
    class_name='fog',
    source_dir=BASE_PATH / 'datasets' / 'foggy-cityscapes-image-dataset' / 'Dense_Fog',
    target_dir=BASE_PATH / 'datasets' / 'foggy-cityscapes-image-dataset-prepared',
)

process_class(
    class_name='fog',
    source_dir=BASE_PATH / 'datasets' / 'foggy-cityscapes-image-dataset' / 'Medium_Fog',
    target_dir=BASE_PATH / 'datasets' / 'foggy-cityscapes-image-dataset-prepared',
)


process_class(
    class_name='clear',
    source_dir=BASE_PATH / 'datasets' / 'foggy-cityscapes-image-dataset' / 'No_Fog',
    target_dir=BASE_PATH / 'datasets' / 'foggy-cityscapes-image-dataset-prepared',
)

In [16]:
DATASETS_PATHS = [
    BASE_PATH / 'datasets/fog-detection-dataset-prepared',
    BASE_PATH / 'datasets/fog-or-smog-detection-dataset-prepared',
    BASE_PATH / 'datasets/foggy-cityscapes-image-dataset-prepared'
]
DATASETS_PREFIXES = [
    'fg_', 'fos_', 'fc_'
]

OUT_PATH = BASE_PATH / 'datasets/fog-combined'

def copy_with_prefix(src, dst, prefix=""):
    src = Path(src)
    dst = Path(dst)
    dst = dst.with_name(
        f"{prefix}{dst.name}"
    )
    shutil.copy2(src, dst)

In [19]:

for dataset_path, prefix in zip(DATASETS_PATHS, DATASETS_PREFIXES):
  for split in os.listdir(dataset_path):
      split_path = dataset_path / split

      for class_name in os.listdir(split_path):
          shutil.copytree(
              src=split_path / class_name,
              dst=OUT_PATH / split / class_name,
              dirs_exist_ok=True,
              copy_function=lambda s, d: copy_with_prefix(s, d, prefix)
          )