This script goes through that dataframe generated from the matlab file and pulls the pupil data we are interested in anaylyizing. For example, we are interested in looking at what happens after a stimulus onset, so this script goes through and collects that data and puts it into a dataframe to be analyzed later. 

In [2]:
import pandas as pd 
from ast import literal_eval
import math 

In [None]:
eye_df = pd.read_csv("data/eye_data.csv")
behavior_df = pd.read_csv("data2/behavior_data_2.csv")

In [4]:
print(behavior_df.columns)

Index(['Task Type', 'SID', 'Trial', 'Stimulus Time', 'Reaction Time',
       'ProbeOnsetTime', 'isCorrectResponse'],
      dtype='object')


In [5]:


l = {}
taskTypes = behavior_df["Task Type"].unique()
SIDS = behavior_df["SID"].unique()

for task in taskTypes:
    subject_behav_df = behavior_df[(behavior_df['Task Type'] == task)]
    lowest_sample_rate = None 
    for subject in SIDS:
        for block in range(10):
            
            subjects_block_data = subject_behav_df[(subject_behav_df['SID'] == subject) & (subject_behav_df['Trial'] == block)].reset_index()
            stim_time = subjects_block_data['Stimulus Time']
            
            for i in range(1, len(stim_time)): 
        
                if lowest_sample_rate is None or abs(((stim_time[i] - stim_time[i - 1])) < lowest_sample_rate):
                    lowest_sample_rate = abs(stim_time[i] - stim_time[i - 1])
    l[task] = math.floor(lowest_sample_rate)

In [6]:
print(l)

{'DPT': 3, 'MA': 14, 'PVT': 3, 'VWM': 3}


In [19]:
OFFSET = 0.5 # how much data we should grab before stimulus onset time

taskTypes = behavior_df["Task Type"].unique()
SIDS = behavior_df["SID"].unique()
total_subs = len(SIDS)

final_df = pd.DataFrame(columns=['SID', 'Task Type', 'Block', 'Stimulus Index', 'Pupil Diameter', 'Pupil X', 'Pupil Y'])

for task in taskTypes:
    lowest_sample_rate = l[task]
    
    # [SID, Task, Block/Trial, Stimulus Index, Pupil Data]
    all_data = []
    
    for subject_id in SIDS:
        #clear_output(wait=False)
        print(task, ":", str(subject_id + 1), "/", str(total_subs + 1))
        
        # define conditions     
        eye_condition = ((eye_df['Task Type'] == task) & (eye_df['SID'] == subject_id))
        behav_condition = (behavior_df['Task Type'] == task) &(behavior_df['SID'] == subject_id)

        # get the dfs for the target subject 
        subject_eye_df = eye_df[eye_condition]
        subject_behav_df = behavior_df[behav_condition]        
        
        # go through all the trials/blocks 
        for trial in range(10):
            has_data = False 
            missing_time_series_data = False 
            
            trial_condition = subject_eye_df['Trial'] == trial
            trial_behav_df = subject_behav_df[subject_behav_df['Trial'] == trial].reset_index()
            
            for stim_index in range(len(trial_behav_df['Stimulus Time'])):
                has_data = True
                
                
                # get the window of time we are interested in 
                start_time = trial_behav_df['Stimulus Time'][stim_index] - OFFSET
                end_time = start_time + lowest_sample_rate + OFFSET
    
                #select rows in the eye data within that time frame 
                condition1 = (subject_eye_df['Time'] >= start_time) & (subject_eye_df['Time'] < end_time) 
                
                # get the pupil data we are interested in 
                interested_pupil_data = subject_eye_df.loc[(condition1 & trial_condition)] # gets us data we are interested for particular subject, for task, and block, and index
                # ensure we aren't missing time series data 
                if len(interested_pupil_data) <= 0:
                    missing_time_series_data = True
                    
                data = [subject_id, task, trial, stim_index, interested_pupil_data['Pupil Diameter'].to_list(), 
                       interested_pupil_data['Pupil X'].to_list(), interested_pupil_data['Pupil Y'].to_list()]
                # add data to list 
                all_data.append(data)
            if has_data:
                # we have data in block, but we are missing data somewhere in the block's time series 
                if missing_time_series_data:
                    print("*", end=' ')
                else:
                    print('o', end=' ')
            else:
                print('x', end=' ')
                
        print()
        
    # note: Pupil Data is an array, it will be stored in the csv as a string, but can be easily converted back 
    df = pd.DataFrame(all_data, columns=['SID', 'Task Type', 'Block', 'Stimulus Index', 'Pupil Diameter', 'Pupil X', 'Pupil Y'])
    final_df = pd.concat([final_df ,df],ignore_index=True)
    
final_df.to_csv("Extracted_Data.csv")

DPT : 1 / 58
o o o o o o o o x x 
DPT : 2 / 58
o o o x o o o o o x 
DPT : 3 / 58
o x x x x x x x x x 
DPT : 4 / 58
o x x x x x x x x x 
DPT : 5 / 58
o o o o o x x x x x 
DPT : 6 / 58
x x x o o o o o x x 
DPT : 7 / 58
o o o o o o o o x x 
DPT : 8 / 58
o x x x x x x x x x 
DPT : 9 / 58
o o o o o o o o x x 
DPT : 10 / 58
o x x x x x x x x x 
DPT : 11 / 58
o x x x x x x x x x 
DPT : 12 / 58
o x x x x x x x x x 
DPT : 13 / 58
o o o x o o o o o o 
DPT : 14 / 58
o o o o o o o o x x 
DPT : 15 / 58
o o o o o o o o x x 
DPT : 16 / 58
o o o o o o o o x x 
DPT : 17 / 58
o o x o o o o o x x 
DPT : 18 / 58
o o x x x x x x x x 
DPT : 19 / 58
o x x x x x x x x x 
DPT : 20 / 58
o o o o o o o o x x 
DPT : 21 / 58
o x x x x x x x x x 
DPT : 22 / 58
o x x x x x x x x x 
DPT : 23 / 58
o o o o o o o o x x 
DPT : 24 / 58
o x o o o o o o o x 
DPT : 25 / 58
o x x x x x x x x x 
DPT : 26 / 58
x x o o o o o o o x 
DPT : 27 / 58
o x x x x x x x x x 
DPT : 28 / 58
o o o o o o o o x x 
DPT : 29 / 58
o o o o o o o o