In [1]:
import os
import shutil
from collections import defaultdict
from tqdm import tqdm
import time


def is_image_file(file_name: str):
    image_extensions = {".jpg", ".jpeg", ".png"}
    _, ext = os.path.splitext(file_name)
    return ext.lower() in image_extensions

def create_image_mapping_and_stats(source_folder: str, mapping_files: dict):
    image_to_folder_mapping = {}
    folder_stats = defaultdict(int)
    unmatched_images = []
    
    file_to_folder = {}
    for mapping_file, target_folder in mapping_files.items():
        if not os.path.exists(target_folder):
            print(f"Target folder does not exist: {target_folder}. Halting program.")
            exit()
    
        with open(mapping_file, "r") as f:
            for line in f:
                image_file = line.strip()
                file_to_folder[image_file] = target_folder
    
    for root,_,files in os.walk(source_folder):
        for file in files:
            if is_image_file(file):
                if file in file_to_folder:
                    target_folder = file_to_folder[file]
                    full_image_path = os.path.join(root,file)
                    image_to_folder_mapping[full_image_path] = target_folder
                    folder_stats[target_folder] += 1
                else:
                    print(root,file)
                    unmatched_images.append(file)
    
    return image_to_folder_mapping, folder_stats, unmatched_images

def copy_images(image_to_folder_mapping):
    images_copied_per_folder = defaultdict(int)
    
    for image_path, target_folder in image_to_folder_mapping.items():
        if not os.path.exists(target_folder):
            print(f"Target folder does not exist: {target_folder}. Halting program.")
            exit()

        target_path = os.path.join(target_folder, os.path.basename(image_path))
        shutil.copy2(image_path, target_path)
        images_copied_per_folder[target_folder] += 1
        
    return images_copied_per_folder

In [2]:
 
source_folder = "/data1/kuldeep_2/all_LCC_FSD"

mapping_files = {
    "/data1/kuldeep_2/LCC_FSD/test_fake.txt" : "/data1/kuldeep_2/LCC_FSD/test/fake",
    "/data1/kuldeep_2/LCC_FSD/test_real.txt" : "/data1/kuldeep_2/LCC_FSD/test/real",
    "/data1/kuldeep_2/LCC_FSD/train_fake.txt" : "/data1/kuldeep_2/LCC_FSD/train/fake",
    "/data1/kuldeep_2/LCC_FSD/train_real.txt" : "/data1/kuldeep_2/LCC_FSD/train/real",
    "/data1/kuldeep_2/LCC_FSD/valid_fake.txt" : "/data1/kuldeep_2/LCC_FSD/valid/fake",
    "/data1/kuldeep_2/LCC_FSD/valid_real.txt" : "/data1/kuldeep_2/LCC_FSD/valid/real",
     
    }

In [3]:
image_to_folder_mapping, folder_stats, unmatched_images = create_image_mapping_and_stats(source_folder, mapping_files)

In [4]:
#Check stats
for folder_path, image_count in sorted(folder_stats.items()):
    print(f"Path : {folder_path:<50}\tCount : {image_count}\n")
print(f"\n\nNumber of unmatched images : {len(unmatched_images)}")

Path : /data1/kuldeep_2/LCC_FSD/test/fake                	Count : 7266

Path : /data1/kuldeep_2/LCC_FSD/test/real                	Count : 314

Path : /data1/kuldeep_2/LCC_FSD/train/fake               	Count : 7076

Path : /data1/kuldeep_2/LCC_FSD/train/real               	Count : 1223

Path : /data1/kuldeep_2/LCC_FSD/valid/fake               	Count : 2543

Path : /data1/kuldeep_2/LCC_FSD/valid/real               	Count : 405



Number of unmatched images : 0


In [5]:
images_copied_per_folder = copy_images(image_to_folder_mapping)

In [6]:
#Verify stats
for folder_path, image_count in sorted(images_copied_per_folder.items()):
    print(f"Path : {folder_path:<50}\tCount : {image_count}\n")

Path : /data1/kuldeep_2/LCC_FSD/test/fake                	Count : 7266

Path : /data1/kuldeep_2/LCC_FSD/test/real                	Count : 314

Path : /data1/kuldeep_2/LCC_FSD/train/fake               	Count : 7076

Path : /data1/kuldeep_2/LCC_FSD/train/real               	Count : 1223

Path : /data1/kuldeep_2/LCC_FSD/valid/fake               	Count : 2543

Path : /data1/kuldeep_2/LCC_FSD/valid/real               	Count : 405

