In [None]:
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats

#### Create DataFrame

In [None]:
lst_df = []

for dir in (glob.glob('./data/responses/*')):
    dir_len = len(os.listdir(dir))
    if dir_len != 0:
        files = glob.glob(dir+'/*.csv')
        for file in files:
            temp_df = pd.read_csv(file)
            lst_df.append(temp_df)

df = pd.concat(lst_df)
df = df.reset_index()

In [None]:
df['Accuracy'] = (df['CorrectResp']==df['Resp']).astype(int)
df['DisplayedDyad'] = df['AudioPath'].str.split('/').str[4]
df

#### Calculate SDT Statistics

In [None]:
def get_stats(df):
    hits = len(df[(df['Condition'] == 'TRUE') & (df['Resp'] == 'g')])
    misses = len(df[(df['Condition'] == 'TRUE') & (df['Resp'] == 'h')])
    correct_rejections = len(df[(df['Condition'] != 'TRUE') & (df['Resp'] == 'h')])
    false_alarms = len(df[((df['Condition'] != 'TRUE') & (df['Resp'] == 'g'))])

    # Hit Rate, False Alarm Rate & d'
    hit_rate = hits / (hits+misses)
    fa_rate = false_alarms / (false_alarms+correct_rejections)
    print('Hit rate =', hit_rate)
    print('False Alarm rate =', fa_rate)
    print('***********************************************')
    z_hitRate = stats.norm.ppf(hit_rate)
    z_falseAlarmRate = stats.norm.ppf(fa_rate)
    print('d_prime =', z_hitRate - z_falseAlarmRate)

    # Unbiased Hit Rate
    num_target_stim = df['Condition'].value_counts()['TRUE']                        # number of TRUE trials
    num_uses_response = df['Resp'].value_counts()['g']                              # number of times participants responded TRUE
    unbiased_hit_rate = (hits/num_target_stim) * (hits)/(num_uses_response)
    print('Unbiased Hit Rate =', unbiased_hit_rate)

In [None]:
get_stats(df=df)

#### Calculate Accuracy per Dyad

In [None]:
def calculate_accuracy(df):
    hits = len(df[(df['Condition'] == 'TRUE') & (df['Resp'] == 'g')])
    correct_rejections = len(df[(df['Condition'] != 'TRUE') & (df['Resp'] == 'h')])

    return (hits+correct_rejections)/len(df)

In [None]:
df_dyad = df.groupby(df['DisplayedDyad']).apply(lambda x: calculate_accuracy(x)).reset_index(name='Accuracy')
trials = df.groupby(df['DisplayedDyad']).apply(lambda x: len(x)).reset_index(name='NumTrials')['NumTrials']
df_dyad['NumTrials'] = trials
stims = df.groupby(df['DisplayedDyad']).apply(lambda x: x['VideoPath'].to_numpy()).reset_index(name='Stims')['Stims']
df_dyad['Stims'] = stims

df_dyad

In [None]:
sns.histplot(data=df_dyad, x='Accuracy', kde=True)

#### Calculate SDT stats after excluding Dyads

In [None]:
df_mod = df[
            # (df['DisplayedDyad'] != '10') & 
            # (df['DisplayedDyad'] != '19') & 
            (df['DisplayedDyad'] != '29') & 
            # (df['DisplayedDyad'] != '33') &
            # (df['DisplayedDyad'] != '42')
            (df['DisplayedDyad'] != '51')
            ]

get_stats(df=df_mod)