In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import time
import traceback
from datetime import datetime

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

In [2]:
input_directory = "/Users/sauravyadav/Documents/Repos/Datasets/BAR_FACS/"
output_directory = "/Users/sauravyadav/Documents/Repos/Datasets/BAR_FACS/output/"

In [4]:
facs_df = pd.read_csv(f'{input_directory}/BAR_FACS_sample.csv')
facs_df = facs_df.iloc[:,:14]

In [5]:
facs_df.sample()

Unnamed: 0,Group_ID,Frame,Sub_ID,AU6,AU9,AU12,AU14,AU15,AU20,Speak,Sip,Sip_Fake,Cannot_Code,Laugh
72258,1,24087,2007,0,0,0,0,0,0,0,0,0,0,0


In [4]:
facs_df['Duchenne_smile'] = facs_df['AU6'] & facs_df['AU12']

In [5]:
facs_df['Duchenne_smile_sum'] = facs_df.groupby(['Group_ID', 'Frame'])['Duchenne_smile'].transform('sum')
facs_df['GoldenMoment'] = (facs_df['Duchenne_smile_sum'] == 3).astype(int)

facs_df['Triadic_laughing_sum'] = facs_df.groupby(['Group_ID', 'Frame'])['Laugh'].transform('sum')
facs_df['TriadicLaughing'] = (facs_df['Triadic_laughing_sum'] == 3).astype(int)

facs_df = facs_df.drop(columns = ['Duchenne_smile_sum', 'Triadic_laughing_sum'])

In [19]:
facs_df.columns

Index(['Group_ID', 'Frame', 'Sub_ID', 'AU6', 'AU9', 'AU12', 'AU14', 'AU15',
       'AU20', 'Speak', 'Sip', 'Sip_Fake', 'Cannot_Code', 'Laugh',
       'Duchenne_smile', 'GoldenMoment', 'TriadicLaughing'],
      dtype='object')

In [6]:
facs_df.to_csv(f'{output_directory}/BAR_FACS_V2.csv', index = False)

In [7]:
facs_grouped_df = facs_df.groupby(['Group_ID', 'Frame']).agg({
                                    'GoldenMoment': 'first',
                                    'TriadicLaughing': 'first'
                                }).reset_index()

In [8]:
facs_grouped_df['Minute'] = ((facs_grouped_df['Frame']) // 1800) + 1

In [9]:
facs_grouped_df = facs_grouped_df.groupby(['Group_ID', 'Minute']).agg(
    GoldenMoment_Frames=('GoldenMoment', 'sum'),
    TriadicLaughing_Frames=('TriadicLaughing', 'sum')
).reset_index()

In [10]:
facs_grouped_df['GoldenMoment_Seconds'] = (facs_grouped_df['GoldenMoment_Frames']/30).round(2)
facs_grouped_df['TriadicLaughing_Seconds'] = (facs_grouped_df['TriadicLaughing_Frames']/30).round(2)

In [11]:
facs_grouped_df.to_csv(f'{output_directory}/MinuteBinnedResults.csv', index = False)

In [12]:
facs_grouped_df.head()

Unnamed: 0,Group_ID,Minute,GoldenMoment_Frames,TriadicLaughing_Frames,GoldenMoment_Seconds,TriadicLaughing_Seconds
0,1,1,239,0,7.97,0.0
1,1,2,0,0,0.0,0.0
2,1,3,0,0,0.0,0.0
3,1,4,0,0,0.0,0.0
4,1,5,0,0,0.0,0.0


In [13]:
# Select the first three rows for each 'Group_ID'
first_three = facs_grouped_df.groupby('Group_ID').head(3)

# Select the remaining rows
remaining = facs_grouped_df[~facs_grouped_df.index.isin(first_three.index)]

# Combine the first three rows for each 'Group_ID' and concatenate with the remaining rows
# result = pd.concat([first_three.groupby('Group_ID').sum().reset_index(), remaining])

In [14]:
agg_funcs = {'Minute': lambda x: ','.join(map(str, x)),  # Concatenate 'Minute' with commas
             'GoldenMoment_Frames': 'sum',
             'TriadicLaughing_Frames': 'sum',
             'GoldenMoment_Seconds': 'sum',
             'TriadicLaughing_Seconds': 'sum'}

first_three_collapsed = first_three.groupby('Group_ID').agg(agg_funcs).reset_index()

In [15]:
facs_custom_grouped_df = pd.concat([first_three_collapsed, remaining])
facs_custom_grouped_df['Minute_Sort_Priority'] = facs_custom_grouped_df['Minute'].replace('1,2,3', 1)
facs_custom_grouped_df = facs_custom_grouped_df.sort_values(by=['Group_ID', 'Minute_Sort_Priority'])
facs_custom_grouped_df = facs_custom_grouped_df.drop(columns = ['Minute_Sort_Priority']).reset_index(drop=True)

In [16]:
facs_custom_grouped_df

Unnamed: 0,Group_ID,Minute,GoldenMoment_Frames,TriadicLaughing_Frames,GoldenMoment_Seconds,TriadicLaughing_Seconds
0,1,123,239,0,7.97,0.0
1,1,4,0,0,0.0,0.0
2,1,5,0,0,0.0,0.0
3,1,6,0,0,0.0,0.0
4,1,7,0,0,0.0,0.0
5,1,8,0,0,0.0,0.0
6,1,9,21,0,0.7,0.0
7,1,10,0,0,0.0,0.0
8,1,11,24,20,0.8,0.67
9,1,12,0,0,0.0,0.0


In [17]:
facs_custom_grouped_df.to_csv(f'{output_directory}/CustomBinnedResults.csv', index = False)