In [2]:
# Libraries
import os
import shutil

"""
Run this script after image_processing_script.ipynb

Organizes insect image data into class folders.

This script creates folders for each insect class and moves images to their respective folders based on filename patterns.
Expects directory structure with 'TEST_DATA' and 'VALIDATION_DATA' subfolders, and organizes the images within these folders.
"""

# Directory constants
PROCESSED_DATA = "PROCESSED_DATA"
TEST_DATA = os.path.join(PROCESSED_DATA, "TEST_DATA")
VALIDATION_DATA = os.path.join(PROCESSED_DATA, "VALIDATION_DATA")

# Map of filename patterns to the correct class name
INSECT_CLASS_MAPPING = {
    "weevil": "adult_rice_weevil",
    "centipede": "house_centipede",
    "house_spider": "american_house_spider",
    "house_": "american_house_spider",
    "bedbug": "bedbug",
    "stink": "brown_stink_bug",
    "carpenterant": "carpenter_ant",
    "cellar": "cellar_spider",
    "flea": "flea",
    "silverfish": "silverfish",
    "termite": "subterranean_termite",
    "tick": "tick",
}

def organize_data(data_dir):
    """
    Create insect class folders and organize images to their correct folders
    """
    if not os.path.exists(data_dir):
        print(f"Directory {data_dir} does not exist!")
        return
    
    # Create class folders if doesnt exist
    for class_name in INSECT_CLASS_MAPPING.values():
        class_dir = os.path.join(data_dir, class_name)
        os.makedirs(class_dir, exist_ok=True)
    
    # Move images to their respective class folders
    for file_name in os.listdir(data_dir):
        file_path = os.path.join(data_dir, file_name)
        
        # Skip if it's already a directory
        if os.path.isdir(file_path):
            continue
        
        # Find correct class based on filename pattern
        for keyword, class_name in INSECT_CLASS_MAPPING.items():
            if keyword in file_name.lower():
                dest_path = os.path.join(data_dir, class_name, file_name)
                shutil.move(file_path, dest_path)
                print(f"Moved {file_name} → {dest_path}")
                break  # Stop checking once a match is found

# Organize the data in the TEST_DATA and VALIDATION_DATA folders
organize_data(TEST_DATA)
organize_data(VALIDATION_DATA)

print("TEST_DATA and VALIDATION_DATA have been organized into class folders.")


Moved image_75_tick_processed.jpg → PROCESSED_DATA/TEST_DATA/tick/image_75_tick_processed.jpg
Moved reddit_cellar_36_processed.jpg → PROCESSED_DATA/TEST_DATA/cellar_spider/reddit_cellar_36_processed.jpg
Moved image_114_carpenterant_processed.jpg → PROCESSED_DATA/TEST_DATA/carpenter_ant/image_114_carpenterant_processed.jpg
Moved image_95_flea_google_processed.jpg → PROCESSED_DATA/TEST_DATA/flea/image_95_flea_google_processed.jpg
Moved image_77_carpenterant_processed.jpg → PROCESSED_DATA/TEST_DATA/carpenter_ant/image_77_carpenterant_processed.jpg
Moved image_54_carpenterant_google_processed.jpg → PROCESSED_DATA/TEST_DATA/carpenter_ant/image_54_carpenterant_google_processed.jpg
Moved image_78_flea_processed.jpg → PROCESSED_DATA/TEST_DATA/flea/image_78_flea_processed.jpg
Moved Image_5N_silverfish_processed.jpg → PROCESSED_DATA/TEST_DATA/silverfish/Image_5N_silverfish_processed.jpg
Moved image_16_silverfish_reddit_processed.jpg → PROCESSED_DATA/TEST_DATA/silverfish/image_16_silverfish_reddi