In [9]:
import pandas as pd
from collections import Counter
import numpy as np
import ast
import matplotlib.pyplot as plt
import csv
import os
import shutil
import cv2

In [2]:
# Load the demographic data
demographicsData = pd.read_csv('./Extra/demographics.csv',header=None)
demographicsData.columns = ['video_tag','clip_id','person_id','person_global_id','race','gender','skin_color']

In [3]:
demographicsData

Unnamed: 0,video_tag,clip_id,person_id,person_global_id,race,gender,skin_color
0,aJKL0ahn1Dk,1,0,0,W,F,#e4a9ad
1,aJKL0ahn1Dk,2,4,0,W,F,#c88a92
2,UpFG5ZK62XM,2,4,0,W,F,#be866f
3,UpFG5ZK62XM,3,3,0,W,F,#c5917c
4,UpFG5ZK62XM,4,4,0,W,F,#cb9185
...,...,...,...,...,...,...,...
168,UpFG5ZK62XM,3,1,8,W,M,#c07f76
169,UpFG5ZK62XM,4,2,8,W,M,#e6968b
170,ngITkMvWuq8,1,1,9,W,M,#d29497
171,ngITkMvWuq8,2,3,9,W,M,#e29290


In [5]:
datafolder = './EiLA_data/'

In [19]:
#Store files for each emotion

typeEmotions=['Neutral', 'Happy', 'Sad', 'Surprise', 'Fear', 'Disgust', 'Anger']

dataset={}
# base_path = 'Users/poojakishore/Downloads/CapitaSelectaData' + datafolder
for (idx,emotion) in zip(range(7),typeEmotions):
    folder_path = f'{datafolder}{idx}'
    files = [file for file in os.listdir(folder_path)
             if os.path.isfile(os.path.join(folder_path, file)) and file.lower() != '.ds_store']
    dataset[idx]=files
    
# dataset

In [26]:
#get file name, clip_id, person_id and frame from the file names
def parse_filename(file):
    # Split the file name from its extension
    filename, ext = os.path.splitext(file)

    # Split the filename by underscores to extract parts
    parts = filename.split('_')

    if len(parts) >= 4:
        # Handle cases where the first part contains underscores
        name = '_'.join(parts[:-4])
        clip_id = parts[-4]
        person_id = parts[-3]
        frame_id = parts[-2]

        return name, clip_id, person_id, frame_id
    else:
        raise ValueError("Filename format is not as expected")

In [27]:
output_base_path = './data'  # Base path for organized data
os.makedirs(output_base_path, exist_ok=True)

for idx, emotion in zip(range(7), typeEmotions):
    emotion_folder_male = os.path.join(output_base_path, 'male', str(idx))
    emotion_folder_female = os.path.join(output_base_path, 'female', str(idx))

    os.makedirs(emotion_folder_male, exist_ok=True)
    os.makedirs(emotion_folder_female, exist_ok=True)

    for file in dataset[idx]:
        try:
            video_name, clip_id, person_id, frame_id = parse_filename(file)
            print(video_name, clip_id, person_id, frame_id)

            # Filter demographicsData for matching video_name, clip_id, and person_id
            match = demographicsData[
                (demographicsData['video_tag'] == video_name) &
                (demographicsData['clip_id'] == int(clip_id)) &
                (demographicsData['person_id'] == int(person_id))
                ]

            if not match.empty:
                print(f"Match found for {file}")
                gender = match['gender'].values[0]
                src_file = os.path.join(datafolder, str(idx), file)

                if gender == 'M':
                    dest_file = os.path.join(emotion_folder_male, file)
                elif gender == 'F':
                    dest_file = os.path.join(emotion_folder_female, file)
                else:
                    continue  # Skip if gender is not 'M' or 'F'

                # Debug: Print file paths
                print(f"Copying {src_file} to {dest_file}")

                # Copy the file to the corresponding folder
                shutil.copy(src_file, dest_file)

        except FileNotFoundError:
            print(f"File not found: {file}")
        except ValueError as e:
            print(f"Error parsing file {file}: {e}")
        except Exception as ex:
            print(f"Error processing file {file}: {ex}")

ngITkMvWuq8 3 6 21067
Match found for ngITkMvWuq8_3_6_21067_21097.jpg
Copying ./EiLA_data/0/ngITkMvWuq8_3_6_21067_21097.jpg to ./data/male/0/ngITkMvWuq8_3_6_21067_21097.jpg
Ul53TVUR4NM 2 6 4933
Match found for Ul53TVUR4NM_2_6_4933_4957.jpg
Copying ./EiLA_data/0/Ul53TVUR4NM_2_6_4933_4957.jpg to ./data/male/0/Ul53TVUR4NM_2_6_4933_4957.jpg
Ul53TVUR4NM 8 3 34715
Match found for Ul53TVUR4NM_8_3_34715_34721.jpg
Copying ./EiLA_data/0/Ul53TVUR4NM_8_3_34715_34721.jpg to ./data/female/0/Ul53TVUR4NM_8_3_34715_34721.jpg
ngITkMvWuq8 2 1 20173
Match found for ngITkMvWuq8_2_1_20173_20269.jpg
Copying ./EiLA_data/0/ngITkMvWuq8_2_1_20173_20269.jpg to ./data/female/0/ngITkMvWuq8_2_1_20173_20269.jpg
Ul53TVUR4NM 10 0 63715
Match found for Ul53TVUR4NM_10_0_63715_63745.jpg
Copying ./EiLA_data/0/Ul53TVUR4NM_10_0_63715_63745.jpg to ./data/female/0/Ul53TVUR4NM_10_0_63715_63745.jpg
Uh00UIl7-bo 1 8 16249
Match found for Uh00UIl7-bo_1_8_16249_16291.jpg
Copying ./EiLA_data/0/Uh00UIl7-bo_1_8_16249_16291.jpg to ./dat

In [30]:
# Read all the file names inside a list of folder and save it in the same array
def read_files(folder):
    files = []
    for file in os.listdir(folder):
        if os.path.isfile(os.path.join(folder, file)) and file.endswith('.jpg'):
            files.append(file)
    return files

In [31]:
# Read all the files in the folder
files = []
emotion_files = {}
for emotion in range(7):
    files.extend(read_files("./EiLA_data/" + str(emotion)))
    emotion_files[emotion] = read_files("./EiLA_data/" + str(emotion))


In [32]:
#Count of male and female in each emotion
genderDistribution={}
for emotion in emotion_files.keys():
    m = 0
    f = 0
    for file in emotion_files[emotion]:
        try:
            video_name, clip_id, person_id, frame_id = parse_filename(file)

            # Filter demographicsData for matching video_name, clip_id, and person_id
            match = demographicsData[
                (demographicsData['video_tag'] == video_name) &
                (demographicsData['clip_id'] == int(clip_id)) &
                (demographicsData['person_id'] == int(person_id))
                ]

            if not match.empty:
                gender = match['gender'].values[0]
                if gender == 'M':
                    m += 1
                elif gender == 'F':
                    f += 1
        except ValueError as e:
            print(f"Error parsing file {file}: {e}")

    genderCount = {'M': m, 'F': f}
    genderDistribution[emotion] = genderCount

In [33]:
genderDistribution

{0: {'M': 1488, 'F': 971},
 1: {'M': 644, 'F': 876},
 2: {'M': 135, 'F': 47},
 3: {'M': 82, 'F': 107},
 4: {'M': 153, 'F': 70},
 5: {'M': 67, 'F': 103},
 6: {'M': 352, 'F': 568}}

In [38]:
# count the files inside the male and in each emotion
for i in os.listdir("./data/gender_distribution/male"):
    #ignore the .DS_Store file
    if i == ".DS_Store":
        continue
    print(i, len(os.listdir("./data/gender_distribution/male/" + i)))

0 1488
6 352
1 644
4 153
3 82
2 135
5 67
