In [2]:
import os
from typing import List

def create_class_directories(parent_dir_path: str, class_names: List[str]):
    # Check if the parent directory exists, if not, create it
    if not os.path.exists(parent_dir_path):
        os.makedirs(parent_dir_path)

    # Iterate through the list of class names and create each as a subdirectory
    for class_name in class_names:
        subdirectory_path = os.path.join(parent_dir_path, class_name)
        if not os.path.exists(subdirectory_path):
            os.makedirs(subdirectory_path)
            print(f"Created subdirectory: {subdirectory_path}")
        else:
            print(f"Subdirectory already exists: {subdirectory_path}")

In [13]:
parent_dir_path = '/space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_train_images'
class_names = ['MISSING', 'RAIN_ONLY', 'SNOW_ONLY']
create_class_directories(parent_dir_path, class_names)

Created subdirectory: /space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_train_images/MISSING
Created subdirectory: /space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_train_images/RAIN_ONLY
Created subdirectory: /space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_train_images/SNOW_ONLY


In [4]:
parent_dir_path = '/space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_test_images'
class_names = ['MISSING', 'RAIN_ONLY', 'SNOW_ONLY']
create_class_directories(parent_dir_path, class_names)

Created subdirectory: /space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_test_images/MISSING
Created subdirectory: /space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_test_images/RAIN_ONLY
Created subdirectory: /space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_test_images/SNOW_ONLY


In [5]:
parent_dir_path = '/space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_val_images'
class_names = ['MISSING', 'RAIN_ONLY', 'SNOW_ONLY']
create_class_directories(parent_dir_path, class_names)

Created subdirectory: /space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_val_images/MISSING
Created subdirectory: /space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_val_images/RAIN_ONLY
Created subdirectory: /space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_val_images/SNOW_ONLY


In [6]:
import os
import shutil
import random

def move_random_files(src_dir_path, tgt_dir_path, num_files):
    """
    Moves a specified number of files from the source directory to the target directory randomly.
    
    Parameters:
    src_dir_path (str): Path to the source directory.
    tgt_dir_path (str): Path to the target directory.
    num_files (int): Number of files to move.
    """
    # List all files in the source directory
    files = [file for file in os.listdir(src_dir_path) if os.path.isfile(os.path.join(src_dir_path, file))]
    
    # Check if there are enough files to move
    if num_files > len(files):
        raise ValueError(f"Requested {num_files} files, but only {len(files)} available.")
    
    # Randomly select files to move
    selected_files = random.sample(files, num_files)
    
    # Move the selected files
    for file in selected_files:
        src_file_path = os.path.join(src_dir_path, file)
        tgt_file_path = os.path.join(tgt_dir_path, file)
        
        # Move file from source to target directory
        shutil.move(src_file_path, tgt_file_path)

In [7]:
from tqdm import tqdm

class_names = ['MISSING', 'RAIN_ONLY', 'SNOW_ONLY']

for category in tqdm(class_names):
    orig_dir_path = f"/space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_images/{category}"
    test_dir_path = f"/space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_test_images/{category}"
    val_dir_path = f"/space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_val_images/{category}"
    
    category_to_num_files = {
        "MISSING": 1000,
        "RAIN_ONLY": 1000,
        "SNOW_ONLY": 1000,
    }
    
    move_random_files(orig_dir_path, test_dir_path, category_to_num_files[category])
    move_random_files(orig_dir_path, val_dir_path, category_to_num_files[category])

100%|██████████| 3/3 [00:03<00:00,  1.17s/it]


In [14]:
import os
import shutil
from math import ceil

def distribute_files(src_dir_path, tgt_dir_path, limit):
    # List all files in the source directory
    files = [f for f in os.listdir(src_dir_path) if os.path.isfile(os.path.join(src_dir_path, f))]
    
    num_files = len(files)
    
    # Calculate how many times each file should be copied
    if num_files >= limit:
        files_to_copy = files[:limit]
        for file in files_to_copy:
            src_file_path = os.path.join(src_dir_path, file)
            tgt_file_path = os.path.join(tgt_dir_path, file)
            shutil.copy(src_file_path, tgt_file_path)
    else:
        # Calculate how many times to copy each file to reach a total of 1000 files
        copies_per_file = limit // num_files
        additional_copies = limit % num_files

        for i, file in enumerate(files):
            for j in range(copies_per_file + (1 if i < additional_copies else 0)):
                src_file_path = os.path.join(src_dir_path, file)
                tgt_file_path = os.path.join(tgt_dir_path, f"{i}_{j}_{file}")
                shutil.copy(src_file_path, tgt_file_path)

In [16]:
!find /space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_images/MISSING -maxdepth 1 -type f | wc -l

94411


In [17]:
from tqdm import tqdm

class_names = ['MISSING', 'RAIN_ONLY', 'SNOW_ONLY']

for category in tqdm(class_names):
    src_dir_path = f"/space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_images/{category}"
    tgt_dir_path = f"/space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/reduced_class_train_images/{category}"
    distribute_files(src_dir_path, tgt_dir_path, 94411)

100%|██████████| 3/3 [04:46<00:00, 95.66s/it] 


In [18]:
import shutil

def zip_directory(source_dir, output_filename):
    shutil.make_archive(output_filename, 'zip', source_dir)

In [19]:
from tqdm import tqdm

dir_names = ['reduced_class_train_images', 'reduced_class_test_images', 'reduced_class_val_images'] 

for dir_name in tqdm(dir_names):
    source_dir = f"/space/hall5/sitestore/eccc/mrd/rpnarmp/hol002/NavCan_WxCams_Sample_v3/{dir_name}"
    zip_directory(source_dir, dir_name)

100%|██████████| 3/3 [11:01<00:00, 220.57s/it]
