In [1]:
import pandas as pd
import numpy as np
import os
import glob
from hrvanalysis import get_time_domain_features
from hrvanalysis import get_frequency_domain_features
import neurokit2 as nk
pd.set_option('display.max_rows', None)

In [None]:
#Final Analysis
#==============

minTimeDiff = 25

def analyzeData(condition):
    finalResultsDF = pd.DataFrame(columns=['Participant_ID','Baseline_HR', 'Baseline_GSR', 'Pretest_Time', 'Pretest_Mistake_Time', 'Pretest_GSR', 'Pretest_HR', 'Level_1_Time', 'Level_1_Mistake_Time', 'Level_1_HR','Level_1_GSR', 'Level_2_Time', 'Level_2_Mistake_Time', 'Level_2_HR','Level_2_GSR', 'Level_3_Time', 'Level_3_Mistake_Time', 'Level_3_HR','Level_3_GSR', 'Level_4_Time', 'Level_4_Mistake_Time', 'Level_4_HR','Level_4_GSR', 'Posttest_Time', 'Posttest_Mistake_Time', 'Posttest_GSR', 'Posttest_HR'])
    file_list = glob.glob(condition + "_BuzzwireStudy\\BuzzwireStudy\\Sensor Data\\FinalRemoved\\*.csv")
    for file in file_list:
        file_name = file[file.rindex('\\')+1:]
        participant_id = file_name[file_name.find('_') + 1:file_name.find('.')]
        print('partipant_id - ', participant_id)

        #sensorDataDF_test = pd.read_csv('VR_BuzzwireStudy/BuzzwireStudy/Sensor Data/Final/016_2D0402.csv')#pd.read_csv(file)
        sensorDataDF_test = pd.read_csv(file)
        sensorDataDF_test = sensorDataDF_test.iloc[27:] #Ignore iMotions metadata in the first 27 rows
        headers = sensorDataDF_test.iloc[0] #Take the resulting df's first row 
        sensorDataDF_test  = pd.DataFrame(sensorDataDF_test.values[1:], columns=headers)
        sensorDataDF_test = sensorDataDF_test[['Timestamp', 'Heart rate','R-R interval','MarkerName','GSR Conductance CAL']].copy()
        sensorDataDF_test['Timestamp'] = sensorDataDF_test['Timestamp'].astype(float)
        sensorDataDF_test['Heart rate'] = sensorDataDF_test['Heart rate'].astype(float)
        sensorDataDF_test['R-R interval'] = sensorDataDF_test['R-R interval'].astype(float)
        sensorDataDF_test['GSR Conductance CAL'] = sensorDataDF_test['GSR Conductance CAL'].astype(float)
        #for i in range(1,len(sensorDataDF_test)): 
        #    currMarkerName = sensorDataDF_test.iloc[i]['MarkerName']
        #    if pd.isnull(currMarkerName): continue

        #Baseline
        baseline_start_time = sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'baseline_started']['Timestamp'].values[0]
        baseline_end_time =  sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'baseline_over']['Timestamp'].values[0]
        baseline_gsr_df = sensorDataDF_test[sensorDataDF_test['Timestamp'].between(baseline_start_time, baseline_end_time)]['GSR Conductance CAL']
        baseline_gsr = baseline_gsr_df.mean()
        #print("Baseline GSR - ", baseline_gsr)
        baseline_hr_df = sensorDataDF_test[sensorDataDF_test['Timestamp'].between(baseline_start_time, baseline_end_time)]['Heart rate']
        baseline_hr = baseline_hr_df.mean()
        #print("Baseline HR - ", baseline_hr)


        #=======================================================================

        #Pre-test
        #Pre-test time, GSR, HR
        pretest_signal_start_time = sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'pre_test_started']['Timestamp'].values[0]
        level_1_signal_start_time =  sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'level_1_started']['Timestamp'].values[0]
        pretest_df = sensorDataDF_test[sensorDataDF_test['Timestamp'].between(pretest_signal_start_time, level_1_signal_start_time)]
        pretest_df.reset_index(inplace = True) #As the indices from the main df is carried over
        #pretest_df.to_csv('pretest_df.csv')

        #Remove left presses after right presses
        pretest_first_right_press_ind = pretest_df.where(pretest_df['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        pretest_df.iloc[pretest_first_right_press_ind:].replace(to_replace ="LeftSwitchPressed", value ="", inplace=True)

        pretest_start_ind = pretest_df.where(pretest_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()
        pretest_end_ind = pretest_df.where(pretest_df.iloc[pretest_start_ind:]['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        #print('pretest_start_ind',pretest_start_ind)
        #print('pretest_end_ind',pretest_end_ind)
        pretest_df = pretest_df.iloc[pretest_start_ind:pretest_end_ind+1]
        pretest_time_secs = (pretest_df.iloc[-1]['Timestamp'] - pretest_df.iloc[0]['Timestamp'])/1000
        #print("Pretest time - ", pretest_time_secs)
        pretest_gsr = pretest_df['GSR Conductance CAL'].mean()
        #print("Pretest GSR - ", pretest_gsr)
        pretest_hr = pretest_df['Heart rate'].mean()
        #print("Pretest HR - ", pretest_hr)
        #pretest_df.iloc[pretest_df.where(pretest_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()]
        #pretest_df.to_csv('pretest_df.csv')

        #Pre-test Mistakes
        pretest_contact_df = pretest_df[pretest_df['MarkerName'] == 'BuzzWireHit']
        total_contact_time = 0
        for i in range(1,len(pretest_contact_df)): 
            #print('Currently processing ', mistakeTimeStampsdf.iloc[i])
            timeDiff = pretest_contact_df.iloc[i]['Timestamp'] - pretest_contact_df.iloc[i-1]['Timestamp']
            #print('Difference with last time stamp- ', timeDiff)
            if(timeDiff < minTimeDiff): #mistake detected if within the timeDiff threshold
                total_contact_time+=timeDiff
            else:
                total_contact_time+=25
        pretest_total_contact_time_secs = total_contact_time/1000
        #print("Pretest total contact time - ", pretest_total_contact_time_secs)

        #=======================================================================
        #Level 1
        #Level 1 time, GSR, HR
        level_1_signal_start_time = sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'level_1_started']['Timestamp'].values[0]
        level_2_signal_start_time =  sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'level_2_started']['Timestamp'].values[0]
        level_1_df = sensorDataDF_test[sensorDataDF_test['Timestamp'].between(level_1_signal_start_time, level_2_signal_start_time)]
        level_1_df.reset_index(inplace = True) #As the indices from the main df is carried over
        #level_1_df.to_csv('level_1_df.csv')

        #Remove left presses after right presses
        level_first_right_press_ind = level_1_df.where(level_1_df['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        level_1_df.iloc[level_first_right_press_ind:].replace(to_replace ="LeftSwitchPressed", value ="", inplace=True)

        level_1_start_ind = level_1_df.where(level_1_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()
        level_1_end_ind = level_1_df.where(level_1_df.iloc[level_1_start_ind:]['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        #print('level_1_start_ind',level_1_start_ind)
        #print('level_1_end_ind',level_1_end_ind)
        level_1_df = level_1_df.iloc[level_1_start_ind:level_1_end_ind+1]
        level_1_time_secs = (level_1_df.iloc[-1]['Timestamp'] - level_1_df.iloc[0]['Timestamp'])/1000 #Last row - first row
        #print("Level 1 time - ", level_1_time_secs)
        level_1_gsr = level_1_df['GSR Conductance CAL'].mean()
        #print("Level 1 GSR - ", level_1_gsr)
        level_1_hr = level_1_df['Heart rate'].mean()
        #print("Level 1 HR - ", level_1_hr)
        #pretest_df.iloc[pretest_df.where(pretest_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()]
        #level_1_df.to_csv('level_1_df.csv')

        #Level Mistakes
        level_1_contact_df = level_1_df[level_1_df['MarkerName'] == 'BuzzWireHit']
        total_contact_time = 0
        for i in range(1,len(level_1_contact_df)): 
            #print('Currently processing ', mistakeTimeStampsdf.iloc[i])
            timeDiff = level_1_contact_df.iloc[i]['Timestamp'] - level_1_contact_df.iloc[i-1]['Timestamp']
            #print('Difference with last time stamp- ', timeDiff)
            if(timeDiff < minTimeDiff): #mistake detected if within the timeDiff threshold
                total_contact_time+=timeDiff
            else:
                total_contact_time+=25
        level_1_total_contact_time_secs = total_contact_time/1000
        #print("Level 1 total contact time - ", level_1_total_contact_time_secs)    

        #=======================================================================
        #Level 2
        #Level 2 time, GSR, HR
        level_2_signal_start_time = sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'level_2_started']['Timestamp'].values[0]
        level_3_signal_start_time =  sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'level_3_started']['Timestamp'].values[0]
        level_2_df = sensorDataDF_test[sensorDataDF_test['Timestamp'].between(level_2_signal_start_time, level_3_signal_start_time)]
        level_2_df.reset_index(inplace = True) #As the indices from the main df is carried over
        #level_2_df.to_csv('level_2_df.csv')

        #Remove left presses after right presses
        level_first_right_press_ind = level_2_df.where(level_2_df['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        level_2_df.iloc[level_first_right_press_ind:].replace(to_replace ="LeftSwitchPressed", value ="", inplace=True)

        level_2_start_ind = level_2_df.where(level_2_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()
        level_2_end_ind = level_2_df.where(level_2_df.iloc[level_2_start_ind:]['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        #print('level_2_start_ind',level_2_start_ind)
        #print('level_2_end_ind',level_2_end_ind)
        level_2_df = level_2_df.iloc[level_2_start_ind:level_2_end_ind+1]
        level_2_time_secs = (level_2_df.iloc[-1]['Timestamp'] - level_2_df.iloc[0]['Timestamp'])/1000 #Last row - first row
        #print("Level 2 time - ", level_2_time_secs)
        level_2_gsr = level_2_df['GSR Conductance CAL'].mean()
        #print("Level 2 GSR - ", level_2_gsr)
        level_2_hr = level_2_df['Heart rate'].mean()
        #print("Level 2 HR - ", level_2_hr)
        #pretest_df.iloc[pretest_df.where(pretest_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()]
        #level_2_df.to_csv('level_2_df.csv')

        #Level Mistakes
        level_2_contact_df = level_2_df[level_2_df['MarkerName'] == 'BuzzWireHit']
        total_contact_time = 0
        for i in range(1,len(level_2_contact_df)): 
            #print('Currently processing ', mistakeTimeStampsdf.iloc[i])
            timeDiff = level_2_contact_df.iloc[i]['Timestamp'] - level_2_contact_df.iloc[i-1]['Timestamp']
            #print('Difference with last time stamp- ', timeDiff)
            if(timeDiff < minTimeDiff): #mistake detected if within the timeDiff threshold
                total_contact_time+=timeDiff
            else:
                total_contact_time+=25
        level_2_total_contact_time_secs = total_contact_time/1000
        #print("Level 2 total contact time - ", level_2_total_contact_time_secs)

        #=======================================================================
        #Level 3
        #Level 3 time, GSR, HR
        level_3_signal_start_time = sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'level_3_started']['Timestamp'].values[0]
        level_4_signal_start_time =  sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'level_4_started']['Timestamp'].values[0]
        level_3_df = sensorDataDF_test[sensorDataDF_test['Timestamp'].between(level_3_signal_start_time, level_4_signal_start_time)]
        level_3_df.reset_index(inplace = True) #As the indices from the main df is carried over
        #level_3_df.to_csv('level_3_df.csv')

        #Remove left presses after right presses
        level_first_right_press_ind = level_3_df.where(level_3_df['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        level_3_df.iloc[level_first_right_press_ind:].replace(to_replace ="LeftSwitchPressed", value ="", inplace=True)

        level_3_start_ind = level_3_df.where(level_3_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()
        level_3_end_ind = level_3_df.where(level_3_df.iloc[level_3_start_ind:]['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        #print('level_3_start_ind',level_3_start_ind)
        #print('level_3_end_ind',level_3_end_ind)
        level_3_df = level_3_df.iloc[level_3_start_ind:level_3_end_ind+1]
        level_3_time_secs = (level_3_df.iloc[-1]['Timestamp'] - level_3_df.iloc[0]['Timestamp'])/1000 #Last row - first row
        #print("Level 3 time - ", level_3_time_secs)
        level_3_gsr = level_3_df['GSR Conductance CAL'].mean()
        #print("Level 3 GSR - ", level_3_gsr)
        level_3_hr = level_3_df['Heart rate'].mean()
        #print("Level 3 HR - ", level_3_hr)
        #pretest_df.iloc[pretest_df.where(pretest_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()]
        #level_3_df.to_csv('level_3_df.csv')

        #Level3 Mistakes
        level_3_contact_df = level_3_df[level_3_df['MarkerName'] == 'BuzzWireHit']
        total_contact_time = 0
        for i in range(1,len(level_3_contact_df)): 
            #print('Currently processing ', mistakeTimeStampsdf.iloc[i])
            timeDiff = level_3_contact_df.iloc[i]['Timestamp'] - level_3_contact_df.iloc[i-1]['Timestamp']
            #print('Difference with last time stamp- ', timeDiff)
            if(timeDiff < minTimeDiff): #mistake detected if within the timeDiff threshold
                total_contact_time+=timeDiff
            else:
                total_contact_time+=25
        level_3_total_contact_time_secs = total_contact_time/1000
        #print("Level 3 total contact time - ", level_3_total_contact_time_secs)

        #=======================================================================

        #Level 4
        #Level 4 time, GSR, HR
        level_4_signal_start_time = sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'level_4_started']['Timestamp'].values[0]
        post_test_signal_start_time =  sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'post_test_started']['Timestamp'].values[0]
        level_4_df = sensorDataDF_test[sensorDataDF_test['Timestamp'].between(level_4_signal_start_time, post_test_signal_start_time)]
        level_4_df.reset_index(inplace = True) #As the indices from the main df is carried over
        #level_4_df.to_csv('level_4_df.csv')

        #Remove left presses after right presses
        level_first_right_press_ind = level_4_df.where(level_4_df['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        level_4_df.iloc[level_first_right_press_ind:].replace(to_replace ="LeftSwitchPressed", value ="", inplace=True)

        level_4_start_ind = level_4_df.where(level_4_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()
        level_4_end_ind = level_4_df.where(level_4_df.iloc[level_4_start_ind:]['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        #print('level_4_start_ind',level_4_start_ind)
        #print('level_4_end_ind',level_4_end_ind)
        level_4_df = level_4_df.iloc[level_4_start_ind:level_4_end_ind+1]
        level_4_time_secs = (level_4_df.iloc[-1]['Timestamp'] - level_4_df.iloc[0]['Timestamp'])/1000 #Last row - first row
        #print("Level 4 time - ", level_4_time_secs)
        level_4_gsr = level_4_df['GSR Conductance CAL'].mean()
        #print("Level 4 GSR - ", level_4_gsr)
        level_4_hr = level_4_df['Heart rate'].mean()
        #print("Level 4 HR - ", level_4_hr)
        #pretest_df.iloc[pretest_df.where(pretest_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()]
        #level_4_df.to_csv('level_4_df.csv')

        #Level Mistakes
        level_4_contact_df = level_4_df[level_4_df['MarkerName'] == 'BuzzWireHit']
        total_contact_time = 0
        for i in range(1,len(level_4_contact_df)): 
            #print('Currently processing ', mistakeTimeStampsdf.iloc[i])
            timeDiff = level_4_contact_df.iloc[i]['Timestamp'] - level_4_contact_df.iloc[i-1]['Timestamp']
            #print('Difference with last time stamp- ', timeDiff)
            if(timeDiff < minTimeDiff): #mistake detected if within the timeDiff threshold
                total_contact_time+=timeDiff
            else:
                total_contact_time+=25
        level_4_total_contact_time_secs = total_contact_time/1000
        #print("Level 4 total contact time - ", level_4_total_contact_time_secs)


        #=======================================================================

        #Post-test
        #Post-test time, GSR, HR
        post_test_signal_start_time = sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'post_test_started']['Timestamp'].values[0]
        #level_1_signal_start_time =  sensorDataDF_test[sensorDataDF_test['MarkerName'] == 'level_1_started']['Timestamp'].values[0]
        #pretest_df = sensorDataDF_test[sensorDataDF_test['Timestamp'].between(pretest_signal_start_time, level_1_signal_start_time)]
        posttest_df = sensorDataDF_test.iloc[sensorDataDF_test.where(sensorDataDF_test['MarkerName'] == 'post_test_started').first_valid_index():]
        posttest_df.reset_index(inplace = True) #As the indices from the main df is carried over
        #posttest_df.to_csv('posttest_df.csv')
        posttest_start_ind = posttest_df.where(posttest_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()
        posttest_end_ind = posttest_df.where(posttest_df.iloc[posttest_start_ind:]['MarkerName'] == 'RightSwitchPressed').first_valid_index()
        #print('posttest_start_ind',posttest_start_ind)
        #print('posttest_end_ind',posttest_end_ind)
        posttest_df = posttest_df.iloc[posttest_start_ind:posttest_end_ind+1]
        posttest_time_secs = (posttest_df.iloc[-1]['Timestamp'] - posttest_df.iloc[0]['Timestamp'])/1000
        #print("Posttest time - ", posttest_time_secs)
        posttest_gsr = posttest_df['GSR Conductance CAL'].mean()
        #print("Posttest GSR - ", posttest_gsr)
        posttest_hr = posttest_df['Heart rate'].mean()
        #print("Posttest HR - ", posttest_hr)
        #pretest_df.iloc[pretest_df.where(pretest_df['MarkerName'] == 'LeftSwitchPressed').last_valid_index()]
        #posttest_df.to_csv('posttest_df.csv')

        #Pre-test Mistakes
        posttest_contact_df = posttest_df[posttest_df['MarkerName'] == 'BuzzWireHit']
        total_contact_time = 0
        for i in range(1,len(posttest_contact_df)): 
            #print('Currently processing ', mistakeTimeStampsdf.iloc[i])
            timeDiff = posttest_contact_df.iloc[i]['Timestamp'] - posttest_contact_df.iloc[i-1]['Timestamp']
            #print('Difference with last time stamp- ', timeDiff)
            if(timeDiff < minTimeDiff): #mistake detected if within the timeDiff threshold
                total_contact_time+=timeDiff
            else:
                total_contact_time+=25
        posttest_total_contact_time_secs = total_contact_time/1000
        #print("Posttest total contact time - ", posttest_total_contact_time_secs)

        #=======================================================================    
        #Add to final results df
        #print('',{'Participant_ID' : participant_id,'Baseline_HR' : baseline_hr, 'Baseline_GSR' : baseline_gsr, 'Pretest_Time' : pretest_time_secs, 'Pretest_Mistake_Time' : pretest_total_contact_time_secs, 'Pretest_GSR' : pretest_gsr, 'Pretest_HR' : pretest_hr, 'Level_1_Time' : level_1_time_secs, 'Level_1_Mistake_Time' : level_1_total_contact_time_secs, 'Level_1_HR' : level_1_hr,'Level_1_GSR' : level_1_gsr, 'Level_2_Time' : level_2_time_secs, 'Level_2_Mistake_Time' : level_2_total_contact_time_secs, 'Level_2_HR' : level_2_hr,'Level_2_GSR' : level_2_gsr, 'Level_3_Time' : level_3_time_secs, 'Level_3_Mistake_Time' : level_3_total_contact_time_secs, 'Level_3_HR' : level_3_hr,'Level_3_GSR' : level_3_gsr, 'Level_4_Time' : level_4_time_secs, 'Level_4_Mistake_Time' : level_4_total_contact_time_secs, 'Level_4_HR' : level_4_hr,'Level_4_GSR' : level_4_gsr,'Posttest_Time' : posttest_time_secs, 'Posttest_Mistake_Time' : posttest_total_contact_time_secs, 'Posttest_GSR' : posttest_gsr, 'Posttest_HR' : posttest_hr})
        finalResultsDF = finalResultsDF.append({'Participant_ID' : participant_id,'Baseline_HR' : baseline_hr, 'Baseline_GSR' : baseline_gsr, 'Pretest_Time' : pretest_time_secs, 'Pretest_Mistake_Time' : pretest_total_contact_time_secs, 'Pretest_GSR' : pretest_gsr, 'Pretest_HR' : pretest_hr, 'Level_1_Time' : level_1_time_secs, 'Level_1_Mistake_Time' : level_1_total_contact_time_secs, 'Level_1_HR' : level_1_hr,'Level_1_GSR' : level_1_gsr, 'Level_2_Time' : level_2_time_secs, 'Level_2_Mistake_Time' : level_2_total_contact_time_secs, 'Level_2_HR' : level_2_hr,'Level_2_GSR' : level_2_gsr, 'Level_3_Time' : level_3_time_secs, 'Level_3_Mistake_Time' : level_3_total_contact_time_secs, 'Level_3_HR' : level_3_hr,'Level_3_GSR' : level_3_gsr, 'Level_4_Time' : level_4_time_secs, 'Level_4_Mistake_Time' : level_4_total_contact_time_secs, 'Level_4_HR' : level_4_hr,'Level_4_GSR' : level_4_gsr,'Posttest_Time' : posttest_time_secs, 'Posttest_Mistake_Time' : posttest_total_contact_time_secs, 'Posttest_GSR' : posttest_gsr, 'Posttest_HR' : posttest_hr}, ignore_index=True)


    finalResultsDF.to_csv(('finalResults_removed_' + condition + '.csv'), sep=',')
    print("Analysis complete")
