In [8]:
# Goal of this script:
#   Creates a new dataset (destination) from a source dataset by:
#       - creating the destination dataset structure to match the source dataset
#       - randomly selecting a subset of the source dataset (variable in code)
#       - copying the selected files to the destination dataset



# Import required modules
import os
from pathlib import Path
import time
import random
import shutil

# Import custom functions
from randomly_selected_dataset_utils import*

In [10]:
# FUNCTIONS


In [None]:
# Specify the source dataset directory which either
#   - contains the subdirs of the data (color, grayscale, and segmented)
#   - or is a subdir itself (color, grayscale, or segmented)

current_dir = Path.cwd()
print(f'Current Working Directory: "{current_dir}"')

src_dir = current_dir.parent / 'PlantVillage-Dataset' / 'Original' / 'segmented'
dest_dir = current_dir.parent / 'PlantVillage-Dataset'

# Check if the source dataset directory exists
if not os.path.exists(src_dir):
    raise FileNotFoundError(f'FAILED:  Source dataset dir path does not exist: "{src_dir}"')
    # print(f'Source dataset dir path does not exist: "{src_dir}"')
    # sys.exit()
print(f'Source dataset dir path set: "{src_dir}"')

# Check if the destination dataset directory exists
if not os.path.exists(dest_dir):
    raise FileNotFoundError(f'FAILED:  Destination dataset dir path does not exist: "{dest_dir}"')
    # print(f'Destination dataset dir path does not exist: "{dest_dir}"')
    # sys.exit()
print(f'Destination dataset dir path set: "{dest_dir}"')



In [None]:
# If the source dataset directory path ends with either 'color', 'grayscale', or 'segmented', then
#   - run the function to create the parent directory in the destination directory
#   - run the function to create the detected subdir in the parent directory
#   - run the function to create a list of randomly selected files from each directory in the subdir
#   - copy the selected files to the destination directory

num_dataset_files_to_copy = 1

# if src_dir.endswith('color') or src_dir.endswith('grayscale') or src_dir.endswith('segmented'):
if src_dir.name in ['color', 'grayscale', 'segmented']:
    # Determine the subdir name
    # detected_subdir_name = os.path.basename(src_dir)
    detected_subdir_name = src_dir.name
    print(f'Source subdir name: "{detected_subdir_name}"')

    # Create parent dir for the random dataset in the destination dataset dir
    created_parent_dir_name = create_dir(dest_dir, f'Random Dataset_{num_dataset_files_to_copy}', force_create_new_dir=True)
    print(f'CREATED:  Destination parent dir: "{created_parent_dir_name}"')

    # Create the directory for the detected image type in the parent directory
    created_imp_type_name = create_dir(os.path.join(dest_dir, created_parent_dir_name), detected_subdir_name, force_create_new_dir=False)
    print(f'CREATED:  Image type dir: "{created_imp_type_name}\\"')
    print()

    # Create a list of randomly selected files from each directory in the subdir
    subdirs = [subdir for subdir in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, subdir))]
    for subdir in subdirs:   # commeneted out for testing purposes
        # subdir = 'apple___apple_scab'   # included for testing purposes

        # Create the subdir in the image type directory
        created_subdir_name = create_dir(os.path.join(dest_dir, created_parent_dir_name, created_imp_type_name), subdir, force_create_new_dir=False)
        print(f'CREATED:  Subdir: "{created_subdir_name}"')

        # Create a list of files in the source subdir
        src_subdir_files = [file for file in os.listdir(os.path.join(src_dir, subdir)) if os.path.isfile(os.path.join(src_dir, subdir, file))]
        print(f'Number of files in {subdir} subdir: {len(src_subdir_files)}')

        # Create a new list of randomly selected files from the source subdir
        random_files_names = random.sample(src_subdir_files, num_dataset_files_to_copy)
        print(f'Number of files to copy: {len(random_files_names)}')
        
        # Copy the selected files to the destination directory
        print(f'Copying files from    "{os.path.join(src_dir, subdir)}"    to    "{os.path.join(dest_dir, created_parent_dir_name, created_imp_type_name, subdir)}":')
        for file in random_files_names:
            copy_file(file, os.path.join(src_dir, subdir), os.path.join(dest_dir, created_parent_dir_name, created_imp_type_name, subdir))
        
        print()



    #########

    # Create a .txt terminal commands file for uploading the dataset to Edge Impulse in the parent directory
    print(f'Creating terminal commands file for uploading the dataset to Edge Impulse...')
    terminal_commands_file_path = os.path.join(dest_dir, created_parent_dir_name, 'Edge Impulse Upload Commands.txt')
    with open(terminal_commands_file_path, 'w') as f:
        f.write('data labels:\n')
        for subdir in subdirs:
            f.write(f'    {subdir}\n')
        f.write('\n')

        for subdir in subdirs:
            f.write(f'edge-impulse-uploader --label "{subdir}" --category split "{os.path.join(dest_dir, created_parent_dir_name, created_imp_type_name, subdir)}\\*.jpg" --project-id PROJECT_ID\n')

    print(f'CREATED:  Terminal commands file: "{terminal_commands_file_path}"')
    print()

In [13]:
# Else
#    - verify that the source dataset directory contains the subdirs 'color', 'grayscale', and 'segmented'
#    - run the function to create the parent directory in the destination directory
#    - run the function to create each subdir in the parent directory
#    - run the function to create a list of randomly selected files from each subdir
#    - copy the selected files to each destination directory

