In [1]:
import pandas as pd
import random, os, shutil

In [2]:
ganfd = pd.read_csv('GANFD_Data_V1.csv')
men_df = ganfd[(ganfd['gender_fem'] < 0.10)]
women_df = ganfd[(ganfd['gender_fem'] > 0.90)]

In [3]:
# Function to filter and select rows
def filter_and_select_rows(df):
    # List to collect selected rows
    filtered_groups = []
    
    # Group by 'set'
    grouped = df.groupby('set')
    
    for set_value, group in grouped:
        # Filter rows for each race_guess
        black_rows = group[group['race_guess'] == 'Black']
        white_rows = group[group['race_guess'] == 'White']
        
        # Check if both groups are non-empty
        if not black_rows.empty and not white_rows.empty:
            # Get the row with the highest 'race_ba' for 'Black'
            black_row = black_rows.loc[black_rows['race_ba'].idxmax()]
            # Get the row with the lowest 'race_ba' for 'White'
            white_row = white_rows.loc[white_rows['race_ba'].idxmin()]
            
            # Append selected rows
            filtered_groups.append(black_row)
            filtered_groups.append(white_row)
    
    # Combine all selected rows into a new DataFrame
    filtered_df = pd.DataFrame(filtered_groups).reset_index(drop=True)
    return filtered_df

# Apply the function
men_filtered_df = filter_and_select_rows(men_df)
men_filtered_df['gender'] = 'Men'
women_filtered_df = filter_and_select_rows(women_df)
women_filtered_df['gender'] = 'Women'

# Ensure we have an even split of rows if more than required
final_df = pd.concat([
    men_filtered_df[men_filtered_df['race_guess'] == 'Black'].head(15),
    men_filtered_df[men_filtered_df['race_guess'] == 'White'].head(15),
    women_filtered_df[women_filtered_df['race_guess'] == 'Black'].head(15),
    women_filtered_df[women_filtered_df['race_guess'] == 'White'].head(15)
]).reset_index(drop=True)

In [4]:
# Source folder
source_folder = 'GANFD - Cropped Grey Backgrounds'

# Destination folders
dest_folders = {
    'Images': final_df['full_ID'].to_list()
}

# Create destination folders if they don't exist
for folder in dest_folders.keys():
    os.makedirs(folder, exist_ok=True)

# Iterate through the files in the source folder
for filename in os.listdir(source_folder):
    if filename.endswith('.jpg'):  # Only process .jpg files
        file_id = os.path.splitext(filename)[0]  # Extract the ID (without .jpg)
        
        # Check which group the file belongs to and copy it
        for dest_folder, id_list in dest_folders.items():
            if file_id in id_list:
                source_path = os.path.join(source_folder, filename)
                dest_path = os.path.join(dest_folder, filename)
                shutil.copy(source_path, dest_path)
                break  # Move to the next file once copied

In [5]:
final_df.to_csv('image_lookup.csv', index=False)