In [13]:
import os
import shutil
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (12, 8)

In [14]:
DATASETS_PATH = 'c:/Users/Groot/Code/oral_cancer/datasets'
MODIFIED_DATA = 'c:/Users/Groot/Code/oral_cancer/modified'


In [15]:
def get_dataset_size(path):
    num_of_images = {}
    for folder in os.listdir(path):
        folder_path = os.path.join(path, folder)

        num_images_in_folder = len(os.listdir(folder_path))

        num_of_images[folder] = num_images_in_folder

    return num_of_images

In [16]:
def copy_images_with_label(source_root_folder, destination_root_folder):
    if not os.path.exists(source_root_folder):
        print(f"Source root folder '{source_root_folder}' does not exist.")
        return

    if not os.path.exists(destination_root_folder):
        os.makedirs(destination_root_folder)

    parent_folder = os.path.basename(source_root_folder)
    
    for root, dirs, files in os.walk(source_root_folder):
    
        if root != source_root_folder:
            folder = os.path.basename(root)
            destination_parent_folder = os.path.join(destination_root_folder, folder)
            if not os.path.exists(destination_parent_folder):
                os.makedirs(destination_parent_folder)

            for i, file in enumerate(files, start=1):
                source_path = os.path.join(root, file)
                new_filename = f"{parent_folder}_{i}.jpg" 
                destination_path = os.path.join(destination_parent_folder, new_filename)
                try:
                    shutil.copy(source_path, destination_path)
                except Exception as e:
                    print(f"Error: {e}")

In [17]:
source_root_folder = DATASETS_PATH +'/tongue_lip'
destination_root_folder = DATASETS_PATH +'/complete'
copy_images_with_label(source_root_folder, destination_root_folder)

In [18]:
source_root_folder_2 = DATASETS_PATH+'/oral_cancer'
destination_root_folder = DATASETS_PATH +'/complete'
copy_images_with_label(source_root_folder_2, destination_root_folder)

In [19]:
source_root_folder_3 = DATASETS_PATH +'/mendley'
destination_root_folder = DATASETS_PATH +'/complete'
copy_images_with_label(source_root_folder_3, destination_root_folder)

In [20]:
print('Tongue and Lip Dataset Size:', get_dataset_size(DATASETS_PATH +'/tongue_lip'))
print('Oral Cancer Dataset Size   :', get_dataset_size(DATASETS_PATH +'/oral_cancer'))
print('Mendley Dataset Size       :', get_dataset_size(DATASETS_PATH +'/mendley'))
print('Complete Dataset Size      :', get_dataset_size(DATASETS_PATH +'/complete'))

Tongue and Lip Dataset Size: {'cancer': 87, 'non_cancer': 44}
Oral Cancer Dataset Size   : {'cancer': 500, 'non_cancer': 250}
Mendley Dataset Size       : {'cancer': 158, 'non_cancer': 165}
Complete Dataset Size      : {'cancer': 745, 'non_cancer': 459}
