# Get Ground Truth Labels for Plant Data
---
We are going to use labels for participants' emotions extracted from videos taken in parallel to the plant recordings. The emotions have already been predicted based on the facial expressions and what we are going to do is to get the labels and use them as ground truth for our plant experiments. 

In [1]:
import pandas as pd
import os

In [2]:
emotions_dir = "../data/teamwork-emotions"
interim_data_dir = "../data/interim-plant-data-teamwork-extracted"

In [3]:
save_file: bool = True

# Team names. 
teams = [
    "team_01",
    "team_02",
    "team_03",
    "team_04",
    "team_05",
    "team_06",
    "team_07",
    "team_08",
    "team_09",
    "team_10",
    "team_11",
    "team_12",
    "team_13",
    "team_15",
    "team_16",
    "team_17",
    "team_18",
    "team_19",
    "team_20",
    "team_22",
]

# Teamworking days. 
days = ["2023-01-10", "2023-01-12", "2023-01-13"]

As a matter of fact, from the teamwork session interval extraction I got "broken" `.wav` files, meaning that I am not going to use them. I manually changed the folder names so that I know which teams I am going to exclude from the experiment. Valid folder names are "team_01" (without any addition), and folders to be ignored have a longer name that indicates the reason for exclusion, e.g. "team_03_broken_because_44100_samplingrate_on_day1".

In [86]:
# Define a custom sorting key function for .csv labels corresponding to clip_... file and emotions.
def custom_sort_emotion(item):
    """
    Sort labels like "clip_0_11509_11908.csv" first by clip id (0) and second by start frame (11509).
    """
    parts = item.split('_')
    return int(parts[1]), int(parts[2])

# Define a custom sorting key function for interim data labels
def custom_sort_interim(item):
    """
    Sort labels like "sdm_2023-01-10_team_01_8333_9490.wav" first by clip id (0) and second by start frame (11509).
    """
    parts = item.split('_')
    return int(parts[4])

In [95]:
a = "sdm_2023-01-10_team_01_8333_9490.wav"
parts = a.split('.')[0].split("_")
duration = int(parts[5])-int(parts[4])
parts, duration

(['sdm', '2023-01-10', 'team', '01', '8333', '9490'], 1157)

In [96]:
def get_duration_emotion(label):
    """
    Compute the duration of the teamwork session based on the start and end frame in the corresponding label 
    "clip_0_11509_11908.csv", i.e. 11908-11509.
    """
    parts = label.split('.')[0].split("_")
    duration = int(parts[3])-int(parts[2])
    return duration   

def get_duration_interim(label):
    """
    Compute the duration of the teamwork session based on the start and end frame in the corresponding label 
    "sdm_2023-01-10_team_01_8333_9490.wav", i.e. 8333-9490.
    """
    parts = label.split('.')[0].split("_")
    duration = int(parts[5])-int(parts[4])
    return duration    

In [102]:
for t in teams: 
    for d in days:
        if os.path.exists(os.path.join(interim_data_dir,t,d)):
            emotions_path = os.path.join(emotions_dir,t,d)
            print(f"emotion: {emotions_path}")

            clip_files = os.listdir(emotions_path)
            clip_files = [item for item in clip_files if not item.startswith('team')] # remove item "team_1...csv"
            print(f"{len(clip_files)}: {clip_files}")
            
            clip_files = sorted(clip_files, key=custom_sort_emotion) 


            interim_data_path = os.path.join(interim_data_dir,t,d)
            print(f"interim: {interim_data_path}")

            interim_data_files = os.listdir(interim_data_path)
            print(f"{len(interim_data_files)}: {interim_data_files}\n")
            interim_data_files = sorted(interim_data_files, key=custom_sort)

            for i in range(len(clip_files)):
                if get_duration_emotion(clip_files[i]) != get_duration_interim(interim_data_files[i]):
                    print(clip_files[i],"\t",interim_data_files[i])

                    print(get_duration_emotion(clip_files[i]),"\t\t\t",get_duration_interim(interim_data_files[i]))

            

            print("")            
    print("___________________________________")

emotion: ../data/teamwork-emotions\team_01\2023-01-10
8: ['clip_0_8583_9740.csv', 'clip_1_0_880.csv', 'clip_1_1730_3160.csv', 'clip_1_7753_8955.csv', 'clip_1_9762_10255.csv', 'clip_2_0_548.csv', 'clip_2_2332_3245.csv', 'clip_2_3954_5383.csv']
interim: ../data/interim-plant-data-teamwork-extracted\team_01\2023-01-10
8: ['sdm_2023-01-10_team_01_10080_10960.wav', 'sdm_2023-01-10_team_01_11810_13240.wav', 'sdm_2023-01-10_team_01_17833_19035.wav', 'sdm_2023-01-10_team_01_19842_20335.wav', 'sdm_2023-01-10_team_01_20340_20888.wav', 'sdm_2023-01-10_team_01_22672_23585.wav', 'sdm_2023-01-10_team_01_24294_25723.wav', 'sdm_2023-01-10_team_01_8333_9490.wav']


emotion: ../data/teamwork-emotions\team_01\2023-01-12
3: ['clip_0_2468_3156.csv', 'clip_0_9200_10530.csv', 'clip_1_714_2696.csv']
interim: ../data/interim-plant-data-teamwork-extracted\team_01\2023-01-12
3: ['sdm_2023-01-12_team_01_13020_13708.wav', 'sdm_2023-01-12_team_01_19752_21082.wav', 'sdm_2023-01-12_team_01_22366_24348.wav']


_______