In [47]:
import os, sys
import glob
import json
import re
from tqdm import tqdm 
from itertools import product

import numpy as np
import pandas as pd

sys.path.append('/dartfs/rc/lab/F/FinnLab/tommy/isc_asynchrony_behavior/code/utils/')

from config import *
import analysis_utils as utils

In [None]:
EXPERIMENT_NAME = 'pilot-multimodal-0'
# Sourced for aggregating data across subjects
preproc_dir = os.path.join(BASE_DIR, 'stimuli/preprocessed')
behavioral_dir = os.path.join(BASE_DIR, 'derivatives/results/behavioral/') # where we will write our data

# Sourced for aggregating data across subjects
results_dir = os.path.join(BASE_DIR, 'experiments',  EXPERIMENT_NAME, 'cleaned-results', p.experiment_version)

In [126]:
def get_subject_demographics(results_dir, task, modality):

    if task == 'black':
        results_dir = os.path.join(results_dir, 'pilot-multimodal-01')
    else:
        results_dir = os.path.join(results_dir, 'final-multimodal-01')

    # Get subject directories
    sub_dirs = sorted(glob.glob(os.path.join(results_dir, task, modality, f'sub*')))

    print(f'Total of {len(sub_dirs)} subjects')

    all_demographics = []

    for sub_dir in tqdm(sub_dirs):
        sub = os.path.basename(sub_dir)
        current_id, demographics, _, _ = utils.load_participant_results(sub_dir, sub)

        age_filter = demographics['experiment_phase'] == 'demographics-age'

        try: 

            current_age = demographics.loc[age_filter, 'response'].values.astype(float)

            if current_age < 65:
                demographics.loc[age_filter, 'response'] = current_age
            else:
                demographics.loc[age_filter, 'response'] = np.nan
        except:
            demographics.loc[age_filter, 'response'] = np.nan

        df_demographics = demographics.T.reset_index(drop=True)
        df_demographics.columns = df_demographics.iloc[0]
       
       # Remove the first row
        df_demographics = df_demographics.iloc[1:]
        df_demographics['prolific_id'] = current_id

        all_demographics.append(df_demographics)

    df_demographics = pd.concat(all_demographics).reset_index(drop=True)
    return df_demographics

In [134]:
all_demographics['demographics-age'] = all_demographics['demographics-age'].astype(float)

In [None]:
task_list = ['black', 'wheretheressmoke', 'howtodraw']
modality_list = ['video', 'audio', 'text']

results_dir = os.path.join(BASE_DIR, 'experiments',  'next-word-prediction', 'cleaned-results')

all_demographics = []

for task, modality in product(task_list, modality_list):
    demographics = get_subject_demographics(results_dir=results_dir, task=task, modality=modality)
    all_demographics.append(demographics)

all_demographics = pd.concat(all_demographics).reset_index(drop=True)
all_demographics['demographics-age'] = all_demographics['demographics-age'].astype(float)

Total of 200 subjects


100%|██████████| 200/200 [00:04<00:00, 40.71it/s]


Total of 200 subjects


100%|██████████| 200/200 [00:05<00:00, 39.05it/s]


Total of 200 subjects


100%|██████████| 200/200 [00:04<00:00, 40.71it/s]


Total of 150 subjects


100%|██████████| 150/150 [00:03<00:00, 42.88it/s]


Total of 150 subjects


100%|██████████| 150/150 [00:03<00:00, 43.48it/s]


Total of 150 subjects


100%|██████████| 150/150 [00:03<00:00, 43.58it/s]


Total of 150 subjects


100%|██████████| 150/150 [00:04<00:00, 36.59it/s]


Total of 150 subjects


100%|██████████| 150/150 [00:03<00:00, 39.95it/s]


Total of 150 subjects


100%|██████████| 150/150 [00:04<00:00, 34.59it/s]


### Gender demographics

In [137]:
all_demographics['demographics-gender'].value_counts()

Female                   743
Male                     727
Do not wish to report     14
Non-Binary                10
Other                      6
Name: demographics-gender, dtype: int64

### Age demographics

In [135]:
mean_age = all_demographics['demographics-age'].mean()
std_age = all_demographics['demographics-age'].std()

print (f"Mean age: {mean_age}")
print (f"STD age: {std_age}")

Mean age: 37.78812415654521
STD age: 11.791443096895717
