Classification of data set according to age and gender constrains

In [None]:
import os
import shutil
import pandas as pd


# Path to your source image folder and CSV file in Google Drive
source_folder_path = r'Source_Images_Path'
csv_file_path = r'Csv_File_Path'

# Folder in your Google Drive where you want the organized folders to be created
base_drive_folder = r'Organized_Images_Path'  # Specify folder path here

# Read the CSV file containing the image references and Hb values
anemia_data = pd.read_csv(csv_file_path)
_uuid = anemia_data["_uuid"].tolist()
HB_Level = anemia_data["HB Level"].tolist()
Age = anemia_data["Age"].tolist()
Gender = anemia_data["Gender"].tolist()

# Normalize gender values to ensure consistent folder naming
def normalize_gender(gender):
    if str(gender).lower() == 'male':
        return 'Male'
    elif str(gender).lower() == 'female':
        return 'Female'
    return gender

# Define classification categories function
def classify(age, gender):
    gender = normalize_gender(gender)  # Normalize gender here
    if age <= 15:
        return "Children", gender
    elif 15 < age <= 49:
        return "Adults", gender
    else:
        return "Old", gender

# Create the folder structure based on the new requirements
folder_mapping = {}
categories = ["Children", "Adults", "Old"]
for category in categories:
    for gender in ["Male", "Female"]:
        folder_name = os.path.join(base_drive_folder, category, gender)
        os.makedirs(folder_name, exist_ok=True)
        folder_mapping[f"{category} {gender}"] = folder_name

# Copy images to the respective folders in Google Drive
for folder_name in os.listdir(source_folder_path):
    sub_folder_path = os.path.join(source_folder_path, folder_name)
    if folder_name in _uuid:
        idx = _uuid.index(folder_name)
        category, gender = classify(Age[idx], Gender[idx])

        # Skip if gender is missing
        if pd.isnull(gender):
            print(f"Skipping {folder_name} due to missing gender information.")
            continue

        subfolder = f"{category} {gender}"  # E.g., "Children Male" or "Adults Female"

        # Ensure the subfolder exists in the mapping
        if subfolder not in folder_mapping:
            folder_name = os.path.join(base_drive_folder, category, gender)
            os.makedirs(folder_name, exist_ok=True)
            folder_mapping[subfolder] = folder_name

        for file_name in os.listdir(sub_folder_path):
            source_file_path = os.path.join(sub_folder_path, file_name)
            destination_file_path = os.path.join(folder_mapping[subfolder], file_name)

            # Copy the image to the new folder with the original name
            shutil.copy(source_file_path, destination_file_path)

            print(f"Copied {file_name} to {destination_file_path}")


Skipping 00a9b3d7-b055-488b-8e33-e13fe292d55b due to missing gender information.
Copied 1731133152042.jpg to C:\Users\Deepak\Documents\major projecct(rean)\Source code (VS)\preprocessed data\Old\Male\1731133152042.jpg
Copied 1731133157730.jpg to C:\Users\Deepak\Documents\major projecct(rean)\Source code (VS)\preprocessed data\Old\Male\1731133157730.jpg
Copied 1731132756172.jpg to C:\Users\Deepak\Documents\major projecct(rean)\Source code (VS)\preprocessed data\Old\Female\1731132756172.jpg
Copied 1731132763149.jpg to C:\Users\Deepak\Documents\major projecct(rean)\Source code (VS)\preprocessed data\Old\Female\1731132763149.jpg
Copied 1730959645979.jpg to C:\Users\Deepak\Documents\major projecct(rean)\Source code (VS)\preprocessed data\Adults\Female\1730959645979.jpg
Copied 1730959654461.jpg to C:\Users\Deepak\Documents\major projecct(rean)\Source code (VS)\preprocessed data\Adults\Female\1730959654461.jpg
Copied 1731132302187.jpg to C:\Users\Deepak\Documents\major projecct(rean)\Source c