In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
import zipfile
import os
import shutil
import statsmodels.api as sm 
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison
from itertools import combinations
import scipy.stats as stats

import warnings
warnings.filterwarnings("ignore")

In [4]:
et_path = '../data/et'

file_list = os.listdir(et_path)


In [8]:
df_et_parsed = pd.DataFrame()
success_parsed_participant_et = []
for file in file_list:
    if file.endswith('.csv'):
        try:
            participant_id = str(int(file.split('_')[0]))
            df_et = pd.read_csv(os.path.join(et_path, file))
            df_et['participant'] = participant_id
            df_et['event'] = df_et['event'].astype(str)
            df_et = df_et[df_et['event'] != '-1']
            df_et.rename(columns={'event': 'marker_id'}, inplace=True)
            df_et_parsed = pd.concat([df_et_parsed, df_et], axis=0)
            success_parsed_participant_et.append(participant_id)
        except Exception as e:
            print(e)
            print(file)
            continue

In [9]:
df_et_parsed

Unnamed: 0,device_time_stamp,system_time_stamp,gaze_point_x,gaze_point_y,left_gaze_point_on_display_area_x,left_gaze_point_on_display_area_y,right_gaze_point_on_display_area_x,right_gaze_point_on_display_area_y,left_gaze_point_validity,right_gaze_point_validity,left_pupil_diameter,right_pupil_diameter,left_pupil_validity,right_pupil_validity,marker_id,participant
3424,493771620703,493757589631,0.572417,0.516521,0.546936,0.524540,0.597897,0.508502,1,1,2.759796,2.903702,1,1,1124_que,10
3425,493771637358,493757606286,0.574980,0.515769,0.547883,0.520609,0.602077,0.510929,1,1,2.747055,2.882965,1,1,1124_que,10
3426,493771654012,493757622940,0.576869,0.519115,0.550389,0.523084,0.603348,0.515145,1,1,2.759094,2.860168,1,1,1124_que,10
3427,493771670667,493757639595,0.577982,0.523845,0.549652,0.527064,0.606312,0.520627,1,1,2.753647,2.849640,1,1,1124_que,10
3428,493771687322,493757656250,0.575631,0.519208,0.547587,0.525023,0.603675,0.513393,1,1,2.757065,2.843231,1,1,1124_que,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254351,150968570735,150951304098,,,,,,,0,0,,,0,0,24_vivid,7
254352,150968587389,150951320753,,,,,,,0,0,,,0,0,24_vivid,7
254353,150968604044,150951337408,,,,,,,0,0,,,0,0,24_vivid,7
254354,150968620699,150951354063,,,,,,,0,0,,,0,0,24_vivid,7


In [10]:
# Function definitions
shared_columns = ['idx','dimension', 'rot_type', 'angle', 'mirror', 'wm', 
                  'pair_id', 'obj_id', 'orientation1', 'orientation2', 'image_path_1', 'image_path_2',
                  'marker_id', 'correctAns', 'vivid_response', 'key_resp_vivid_slider_control.keys', 'key_resp_vivid_slider_control.rt', 'participant', 'condition_file']

def get_ans_key(row):
    keys_possible_cols = ['key_resp.keys', 'key_resp_3.keys', 'key_resp_6.keys']
    rt_possible_cols = ['key_resp.rt', 'key_resp_3.rt', 'key_resp_6.rt']
    for key, rt in zip(keys_possible_cols, rt_possible_cols):
        if not pd.isna(row[key]) and row[key] != '':
            return row[key], row[rt]
    return np.nan, np.nan

def get_strategy_response(row):
    if (not pd.isna(row['key_resp_strat_control.keys'])) and (row['key_resp_strat_control.keys'] != 'None') and (row['key_resp_strat_control.keys'] != ''):
        try:    
            strat_resp_list = eval(row['key_resp_strat_control.keys'])
            if len(strat_resp_list) > 0:
                last_key = strat_resp_list[-1]
                if last_key == 'rshift':
                    return 4
                elif last_key == 'slash':
                    return 3
                elif last_key == 'period':
                    return 2
                elif last_key == 'comma':
                    return 1
        except:
            print(row['key_resp_strat_control.keys'])
    return np.nan

def get_vivid_response(row):
    if (not pd.isna(row['key_resp_vivid_slider_control.keys'])) and (row['key_resp_vivid_slider_control.keys'] != 'None') and (row['key_resp_vivid_slider_control.keys'] != ''):
        try:    
            vivid_resp_list = eval(row['key_resp_vivid_slider_control.keys'])
            if len(vivid_resp_list) > 0:
                last_key = vivid_resp_list[-1]
                if last_key == 'rshift':
                    return 4
                elif last_key == 'slash':
                    return 3
                elif last_key == 'period':
                    return 2
                elif last_key == 'comma':
                    return 1
        except:
            print(row['key_resp_vivid_slider_control.keys'])
    return np.nan

def get_block(row):
    if row['dimension'] == '2D':
        if row['wm'] == False:
            return '2D_single'
        elif row['wm'] == True:
            return '2D_wm'
        
    elif row['dimension'] == '3D':
        if row['rot_type'] == 'p':
            if row['wm'] == False:
                return '3Dp_single'
            elif row['wm'] == True:
                return '3Dp_wm'
        elif row['rot_type'] == 'd':
            if row['wm'] == False:
                return '3Dd_single'
            elif row['wm'] == True:
                return '3Dd_wm'

def get_corr(row):
    if row['ans_key'] is np.nan:
        return np.nan
    else:
        if row['correctAns'] == row['ans_key']:
            return 1
        else:
            return 0


def parse_excel(df):
    df_blocks = df[~df['dimension'].isna()]
    df_strat = df[~df['key_resp_strat_control.keys'].isna()]
    df_strat = df_strat[['condition_file', 'key_resp_strat_control.keys', 'key_resp_strat_control.rt']]
    df_blocks.reset_index(drop=True, inplace=True)
    df_blocks['idx'] = df_blocks.index
    df_parsed = pd.DataFrame(columns=shared_columns)
    df_parsed['ans_key'] = np.nan
    df_parsed['rt'] = np.nan
    # iterate over the rows of the dataframe to get the ans keys, corr, rt by get_ans_key function
    for idx, row in df_blocks.iterrows():
        key, rt = get_ans_key(row)
        df_parsed.loc[idx, 'ans_key'] = key
        df_parsed.loc[idx, 'rt'] = rt
        for col in shared_columns:
            df_parsed.loc[idx, col] = row[col]
            
        # replace all 'None' values with np.nan
    df_parsed.replace('None', np.nan, inplace=True)
    df_parsed['vivid_response'] = df_parsed.apply(get_vivid_response, axis=1)

    # fill na values in 'rot_type', 'pair_id', 'orientation1', 'orientation2', 'image_path_2' with not applicable
    for col in ['rot_type', 'pair_id', 'orientation1', 'orientation2', 'image_path_2']:
        df_parsed[col].fillna('na', inplace=True)
        
    df_parsed['block'] = df_parsed.apply(get_block, axis=1)
    df_parsed['corr'] = df_parsed.apply(get_corr, axis=1)
    
    df_parsed = df_parsed.merge(df_strat, on='condition_file')
    df_parsed['strategy_response'] = df_parsed.apply(get_strategy_response, axis=1)
    
    df_parsed['mini_block'] = df_parsed['condition_file'].apply(lambda x: x.split('/')[1].split('.')[0]) 
    df_parsed.drop(columns=['condition_file'], inplace=True)
    return df_parsed

In [11]:
behavior_path = '../data/behavior'

test_file = '005_ps_2024-05-30_13h59.48.753.csv'

df_test = pd.read_csv(os.path.join(behavior_path, test_file))


In [14]:
df_parsed = parse_excel(df_test)
df_parsed.to_csv('../test.csv', index=False)