In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None  # default='warn'
from pathlib import Path
from IPython.display import display

In [2]:
group = "control"  
#group = "test"
#group = "all" 

In [3]:
def event_label (row):
   if row['Eye movement type'] == 'Unclassified' :
      return 1
   if row['Eye movement type'] == 'Saccade' :
      return 2
   if row['Eye movement type'] == 'Fixation' :
      return 3
   return 1

def fix_left_based_on_right(left, right):
    if np.isnan(left) and ~np.isnan(right): return right
    return left

def fix_right_based_on_left(left, right):
    if np.isnan(right) and ~np.isnan(left): return left
    return right

def create_file(path, name):
    path_created = os.path.join(path, name)
    print(path_created)
    myfile = Path(path_created)
    myfile.touch(exist_ok=True)
    return path_created
    

In [4]:
all_participants= []

# Define folder based on the group variable
folder_path = "Data/" + group #Possible groups: control, test, all. Set on the top of the notebook

#Do for each file in the folder
for filename in os.listdir(folder_path):

    file_path = os.path.join(folder_path, filename) 
    df_file_content = pd.read_csv(file_path,"\t", na_values='.', low_memory=False) #Read participant-file to a dataframe
    df = df_file_content

    number_of_attemps = df_file_content["Recording start time UTC"].nunique()
    # Take only participants with 8 attempts - there were 8 tasks in the experience
    # print(filename)
    # print(number_of_attemps)
    if number_of_attemps == 4:
        
        # Remove empty columns
        # nan_value = float("NaN") #Define NaN value
        # df_file_content.replace("", nan_value, inplace=True) #Replace all the empty fileds with NaN
        df_file_content.dropna(how = 'all', axis=1, inplace=True) #Remove columns with NaNs only (drop columns where all rows = NaN)

        # Replace all "EyesNotFound" to "Unclassified"
        df_file_content = df_file_content.replace("EyesNotFound", "Unclassified")
        df_file_content['Eye movement type'].fillna("Unclassified", inplace=True)

        # Add column with numeric label for the eye movement type
        df_file_content['Label'] = df_file_content.apply(lambda row: event_label(row), axis=1)

        #Copy the needed columns   
        df_result = df_file_content[["Participant name", "Recording timestamp", "Eye movement type", "Event", "Pupil diameter left", "Pupil diameter right", "Label"]]
        
        #Change the data type of pupil diamater to float
        df_result["Pupil diameter left"] = df_result["Pupil diameter left"].str.replace(',', '.').astype(float)
        df_result["Pupil diameter right"] = df_result["Pupil diameter right"].str.replace(',', '.').astype(float)
   
        df_result["Pupil diameter left fixed"] = df_result.apply(lambda x: fix_left_based_on_right(x['Pupil diameter left'], x['Pupil diameter right']), axis = 1 )
        df_result["Pupil diameter right fixed"] = df_result.apply(lambda x: fix_right_based_on_left(x['Pupil diameter left'], x['Pupil diameter right']), axis = 1 )
               
        #Calculate the average from the both pupils (only those with both pupil measurements will get the value) -> with fixed pupil diameters
        df_result["Pupil_diameter_mean_fixed"] =   (df_result['Pupil diameter left fixed'] + df_result['Pupil diameter right fixed'])/2
        df_result["Pupil_diameter_D_fixed"] = abs((df_result["Pupil diameter left fixed"] - df_result["Pupil diameter right fixed"]))
        df_result["Pupil_diameter_p_fixed"] = df_result["Pupil_diameter_D_fixed"]/df_result["Pupil_diameter_mean_fixed"]
        
       
       
        # Add to the list with all the participants
        all_participants.append(df_result)

In [5]:
columns_A = ['Participant nr','Total Score original','Total Score extended']
columns_B = ['Participant nr','SUM STANDARD','Sum']

empathy_before = pd.read_csv(os.path.join('empathy_scores', 'Questionnaire_datasetIA.csv'), usecols=columns_A, dtype={'Participant nr' : str})
empathy_before = empathy_before.dropna(how='all')
#display(empathy_before)
empathy_after = pd.read_csv(os.path.join('empathy_scores', 'Questionnaire_datasetIB.csv'),  usecols=columns_B, dtype={'Participant nr' : str}, delimiter=';')
#display(empathy_after)
def add_empathy_scores(all_participants):
    for participant in all_participants:

        participant_number =str(int(float(participant['Participant name'].iloc[0].replace("Participant00", ""))))
        participant["number"] = participant_number
        #print(participant_number)
        participant['empathy_before_std'] = empathy_before['Total Score original'].loc[empathy_before['Participant nr'] == participant_number].iloc[0]
        participant['empathy_before_ext'] = empathy_before['Total Score extended'].loc[empathy_before['Participant nr'] == participant_number].iloc[0]
        participant['empathy_after_std'] = empathy_after['SUM STANDARD'].loc[empathy_after['Participant nr'] == participant_number].iloc[0]
        participant['empathy_after_ext'] = empathy_after['Sum'].loc[empathy_after['Participant nr'] == participant_number].iloc[0]
    


In [6]:
def write_to_file(all_participants):

    #print(all_participants)
    start = "ImageStimulusStart"
    stop = "ImageStimulusEnd"
    options = [start, stop]

    all_participant_attempt_summary = pd.DataFrame(columns =  ["participant", "attempt", "attempt_mean", "attempt_std_d", "count_sacc", "count_fix", "empathy_before", "empathy_after"])

    for participant_df in all_participants:
        attempt = 0

        participant_number = str(int(float(participant_df['Participant name'].iloc[0].replace("Participant00", ""))))
        df_start_stop = participant_df.loc[participant_df['Event'].isin(options)]
        df_attempts = df_start_stop[['Recording timestamp','Event']]
        df_attempts.reset_index(drop=True, inplace=True)

        for i in range(0,len(df_attempts),2):
            attempt = attempt + 1
            from_ts = df_attempts.iloc[i]['Recording timestamp']
            #print(from_ts)
            to_ts = df_attempts.iloc[i+1]['Recording timestamp']
            #print(to_ts)

            empathy_before_ext = participant_df['empathy_before_ext'].iloc[0]
            empathy_after_ext =  participant_df['empathy_after_ext'].iloc[0]
            empathy_diff =  empathy_after_ext - empathy_before_ext
            empathy_mean = (empathy_before_ext + empathy_after_ext)/2
            timeseries_df = participant_df[participant_df['Recording timestamp'].between(from_ts, to_ts, inclusive='neither')]
            timeseries_df['Index'] = range(1, len(timeseries_df) + 1)
            time_serias_mean = timeseries_df['Pupil_diameter_mean_fixed'].mean()
            time_series_std = timeseries_df['Pupil_diameter_mean_fixed'].std()
            count_sacc = len(timeseries_df[timeseries_df['Label']== 2])
            count_fix = len(timeseries_df[timeseries_df['Label']== 3])
            count_events = len(timeseries_df['Label'])
            fraction_sacc = count_sacc/count_events
            fraction_fix = count_fix/count_events
            new_record = {
            "participant":participant_number, 
            "attempt":attempt, 
            "attempt_mean":time_serias_mean, 
            "attempt_std_d":time_series_std, 
            "count_sacc":count_sacc, 
            "count_fix":count_fix, 
            "fraction_sacc": fraction_sacc, 
            "fraction_fix": fraction_fix ,
            "empathy_before" :empathy_before_ext, 
            "empathy_after":empathy_after_ext,
            "empathy_diff":empathy_diff,
            "empathy_mean":empathy_mean
            }
            
            all_participant_attempt_summary = all_participant_attempt_summary.append(new_record, ignore_index = True)
    
    #get all mean for particpant
        part_mean_df = all_participant_attempt_summary.loc[all_participant_attempt_summary['participant'] == participant_number]
        total_mean = part_mean_df['attempt_mean'].mean()
        #print(total_mean)
        total_std = part_mean_df['attempt_mean'].std()
        #print(total_std)
        all_participant_attempt_summary.loc[all_participant_attempt_summary['participant'] == participant_number, 'total_mean'] = total_mean
        all_participant_attempt_summary.loc[all_participant_attempt_summary['participant'] == participant_number, 'total_std'] = total_std





    path = "Data/result_matrix/"
    all_participant_attempt_summary.to_csv(create_file(path,'all_'+str(group)+'.csv'), sep=";", index=False)
    
    #write to file

In [7]:

#display(all_participants)
add_empathy_scores(all_participants)
write_to_file(all_participants)

Data/result_matrix/all_test.csv
