In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import time
import traceback
from datetime import datetime
from tqdm import tqdm

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

In [2]:
input_directory = "/Users/sauravyadav/Documents/Repos/Datasets/BAR_FACS/All_Groups"
output_directory = "/Users/sauravyadav/Documents/Repos/Datasets/BAR_FACS/output/"

In [3]:
length_data = []
mdf_list = []
mdf_grouped_list = []
group_sub = []
files = list(os.listdir(input_directory))
for filename in tqdm(files):
    if '.csv' not in filename:
        continue

    group = filename.split('_')[0]
        
    df = pd.read_csv(f'{input_directory}/{filename}')
    # display(df.head())

    length_info_dict = {'Filename' : filename, 'Original Length':len(df)}
    df = df[:188811]
    length_info_dict['Final Length'] = len(df)
    length_data.append(length_info_dict)
    
    if len(df)<188811:
        print(f'Skipping file (length issue): {filename}')
        continue

    df = df.rename(columns = {'Unnamed: 0': 'Sub_ID', 'Unnamed: 1' : 'Sub_ID', 'SubID': 'Sub_ID', '6':'AU6', '12': 'AU12', '1':'Frame', '88' : 'Laugh'})
    # print(df.columns)

    df['AU6'] = df['AU6'].astype(int)
    df['AU12'] = df['AU12'].astype(int)

    try:
        df['Duchenne_smile'] = df['AU6'] & df['AU12']
        
    except Exception as e:
        display(df.head())
        display(df.tail())
        print(e)
        continue
        
    df['Group_ID'] = group
    df = df[['Group_ID','Frame','Sub_ID','Duchenne_smile', 'Laugh']]

    for sub in df.Sub_ID.unique():
        group_sub.append({'Group_ID': group,'Sub_ID': sub})

    df['Duchenne_smile_sum'] = df.groupby(['Group_ID', 'Frame'])['Duchenne_smile'].transform('sum')
    df['GoldenMoment'] = (df['Duchenne_smile_sum'] == 3).astype(int)

    df['Triadic_laughing_sum'] = df.groupby(['Group_ID', 'Frame'])['Laugh'].transform('sum')
    df['TriadicLaughing'] = (df['Triadic_laughing_sum'] == 3).astype(int)

    df = df.drop(columns = ['Duchenne_smile_sum', 'Triadic_laughing_sum'])

    df_grouped = df.groupby(['Group_ID', 'Frame']).agg({
                                    'GoldenMoment': 'first',
                                    'TriadicLaughing': 'first'
                                }).reset_index()

    df_grouped['Minute'] = ((df_grouped['Frame']) // 1800) + 1

    df_grouped = df_grouped.groupby(['Group_ID', 'Minute']).agg(
    GoldenMoment_Frames=('GoldenMoment', 'sum'),
    TriadicLaughing_Frames=('TriadicLaughing', 'sum')
    ).reset_index()

    df_grouped['GoldenMoment_Seconds'] = (df_grouped['GoldenMoment_Frames']/30).round(2)
    df_grouped['TriadicLaughing_Seconds'] = (df_grouped['TriadicLaughing_Frames']/30).round(2)
    
    # display(df_grouped.head())
    
    mdf_list.append(df_grouped)

    # Select the first three rows for each 'Group_ID'
    first_three = df_grouped.groupby('Group_ID').head(3)

    # Select the remaining rows
    remaining = df_grouped[~df_grouped.index.isin(first_three.index)]

    agg_funcs = {'Minute': lambda x: ','.join(map(str, x)),  # Concatenate 'Minute' with commas
             'GoldenMoment_Frames': 'sum',
             'TriadicLaughing_Frames': 'sum',
             'GoldenMoment_Seconds': 'sum',
             'TriadicLaughing_Seconds': 'sum'}

    first_three_collapsed = first_three.groupby('Group_ID').agg(agg_funcs).reset_index()

    facs_custom_grouped_df = pd.concat([first_three_collapsed, remaining])
    facs_custom_grouped_df['Minute_Sort_Priority'] = facs_custom_grouped_df['Minute'].replace('1,2,3', 1)
    facs_custom_grouped_df = facs_custom_grouped_df.sort_values(by=['Group_ID', 'Minute_Sort_Priority']).reset_index(drop=True)
    # facs_custom_grouped_df = facs_custom_grouped_df.drop(columns = ['Minute_Sort_Priority'])

    # display(facs_custom_grouped_df)
    mdf_grouped_list.append(facs_custom_grouped_df)

100%|█████████████████████████████████████████| 127/127 [00:10<00:00, 11.78it/s]


In [4]:
gsub_df = pd.DataFrame(group_sub)

In [5]:
minute_df = pd.concat(mdf_list)
custom_df = pd.concat(mdf_grouped_list)

In [6]:
minute_df = minute_df.merge(gsub_df, on = ['Group_ID'], how = 'left')
custom_df = custom_df.merge(gsub_df, on = ['Group_ID'], how = 'left')

In [7]:
minute_df['Group_ID'] = minute_df['Group_ID'].astype(int)
minute_df['Minute'] = minute_df['Minute'].astype(int)
custom_df['Group_ID'] = custom_df['Group_ID'].astype(int)
minute_df['Sub_ID'] = minute_df['Sub_ID'].astype(int)
custom_df['Sub_ID'] = custom_df['Sub_ID'].astype(int)

In [8]:
minute_df = minute_df[['Group_ID', 'Sub_ID',
 'Minute',
 'GoldenMoment_Frames',
 'TriadicLaughing_Frames',
 'GoldenMoment_Seconds',
 'TriadicLaughing_Seconds'
 ]]
custom_df = custom_df[['Group_ID', 'Sub_ID',
 'Minute',
 'GoldenMoment_Frames',
 'TriadicLaughing_Frames',
 'GoldenMoment_Seconds',
 'TriadicLaughing_Seconds', 'Minute_Sort_Priority'
 ]]

In [9]:
minute_df = minute_df.sort_values(by = ['Group_ID','Minute', 'Sub_ID']).reset_index(drop=True)
minute_df.to_csv(f'{output_directory}/MinuteBinnedResults.csv', index = False)

In [10]:
custom_df = custom_df.sort_values(by = ['Group_ID','Minute_Sort_Priority', 'Sub_ID']).reset_index(drop=True)
custom_df = custom_df.drop(columns=['Minute_Sort_Priority'])
custom_df.to_csv(f'{output_directory}/CustomBinnedResults.csv', index = False)