In [1]:
import scipy
from scipy.io import loadmat
import h5py 
import numpy as np
import pandas as pd
import mat73



In [2]:
def getSubjectDataFromDataBlock(datablock):
    """
        Returns a dictionary structured the followig way:

        subject_index (there are 58 subjects):
            dataCategories (i.e 'Behavior', 'eyeposX', etc.):
                trials (each category should have 10 trials):
                    trialData (the actual data)
    """
    subject_data_dict = {}
    for key in datablock.keys():
        subjects_data = datablock[key]  # list

        for i in range(len(subjects_data)):

            subject_data = subjects_data[i]  # get the subjects data
            subject_name = "subject" + str(i + 1)

            if not subject_name in subject_data_dict:
                subject_data_dict[subject_name] = {}

            if not key in subject_data_dict[subject_name]:
                subject_data_dict[subject_name][key] = {}

            for trial in range(len(subject_data)):
                trial_data = subject_data[trial]
                subject_data_dict[subject_name][key]["trial" + str(trial)] = trial_data
    return subject_data_dict



def GetExperiementDataFrames(block_data_dict, taskType):
    eye_data_df = pd.DataFrame(columns=[ 'Task Type','SID', "Trial", 'Time', 'Pupil Diameter', "Pupil X", "Pupil Y"])
    behavioral_data_df = pd.DataFrame(columns=['Task Type', 'SID', 'Trial', 'Stimulus Time', "Reaction Time",
                                               "isCorrectResponse"])

    for subject_index, subjectID in enumerate(block_data_dict.keys()):
        #create 2 inner dataFrame to reduce the growth rate
        eye_data_df_inner = pd.DataFrame(columns=[ 'Task Type','SID', "Trial", 'Time', 'Pupil Diameter', "Pupil X", "Pupil Y"])
        behavioral_data_df_inner = pd.DataFrame(columns=['Task Type', 'SID', 'Trial', 'Stimulus Time', "Reaction Time",
                                               "isCorrectResponse"])
        
        print("At subject: ", subject_index)
        data = block_data_dict[subjectID]

        behave_data = data["Behavior"]
        pupil_diam = data["pupil_diam"]
        time_stamp = data["time_ms"]
        eyeposx = data["eyeposX"]
        eyeposy = data["eyeposY"]

        MAX_NUM_TRIALS = 10  # each should have 10 trials

        for i in range(MAX_NUM_TRIALS):
            #print("At trial: ", i)
            trial = "trial" + str(i)

            curr_behav = behave_data[trial]
            curr_pupil_diam = pupil_diam[trial]
            curr_time_stamp = time_stamp[trial]
            curr_eyepos_x = eyeposx[trial]
            curr_eyepos_y = eyeposy[trial]

            if (curr_pupil_diam is not None and curr_time_stamp is not None and curr_eyepos_x is not None and curr_eyepos_y is not None and curr_behav is not None):

                # make eye data df
                # The pupil diameter, time stamp, eyeposx and eyeposy are all the same size (we will just use curr_pupil_diam as default size)
#                 for i in range(curr_pupil_diam.size):
#                     # get corresponding/related values at the current index
#                     pupil_diam_val = curr_pupil_diam[i]
#                     time_stamp_val = curr_time_stamp[i]  #this is in milisecond convert to seconds
#                     eyepos_x_val = curr_eyepos_x[i]
#                     eyepos_y_val = curr_eyepos_y[i]

#                     eye_data_df.loc[len(eye_data_df.index)] = [subjectID, taskType, time_stamp_val, pupil_diam_val,
#                                                                eyepos_x_val, eyepos_y_val, trial]
                curr_data_df= pd.DataFrame(columns=['SID', 'Task Type', 'Time', 'Pupil Diameter', "Pupil X", "Pupil Y", "Trial"])
                curr_data_df['Time'] = curr_time_stamp
                curr_data_df['Pupil Diameter'] = curr_pupil_diam
                curr_data_df['Pupil X'] = curr_eyepos_x
                curr_data_df['Pupil Y'] = curr_eyepos_y
                curr_data_df['SID'] = subject_index
                curr_data_df['Task Type'] = taskType
                curr_data_df['Trial'] = i
                eye_data_df_inner = pd.concat([eye_data_df_inner,curr_data_df],ignore_index=True)
                
#                 # make behavioral df (all the list in this dictionary should be same size)
#                 for i in range(len(curr_behav["StimOnsetTime"])):
#                     stimTime_val = curr_behav["StimOnsetTime"][i]
#                     if taskType == "PVT":
#                         # flip this to get correct value (In Dr. Brooks notes)
#                         is_correct_val = 1 - curr_behav["isLapse"][i]
#                     else:
#                         is_correct_val = curr_behav["isCorrectResponse"][i]
#                     react_time_val = curr_behav["ReactionTime"][i]
#                     behavioral_data_df.loc[len(behavioral_data_df.index)] = [str(subjectID), taskType,
#                                                                              stimTime_val, react_time_val,
#                                                                              is_correct_val, trial]
                    
                curr_behave_df = pd.DataFrame(columns=['SID', 'Task Type', 'Stimulus Time', "Reaction Time",
                                               "isCorrectResponse", "Trial"])
                
                curr_behave_df['Stimulus Time'] = curr_behav["StimOnsetTime"]
                curr_behave_df['Reaction Time'] = curr_behav["ReactionTime"]
                if taskType == "PVT":
                    # flip this to get correct value (In Dr. Brooks notes)
                    curr_behave_df["isCorrectResponse"] = 1 - curr_behav["isLapse"]
                else:
                    curr_behave_df["isCorrectResponse"] = curr_behav["isCorrectResponse"]
                curr_behave_df['SID'] = subject_index
                curr_behave_df['Task Type'] = taskType
                curr_behave_df['Trial'] = i
                
                behavioral_data_df_inner= pd.concat([behavioral_data_df_inner,curr_behave_df],ignore_index=True)
            else:
                #print("At else")
                # data may have not been recorded for this trial
                #print("No data for", trial)
                continue
        #end of inner loop
        
        eye_data_df = pd.concat([eye_data_df ,eye_data_df_inner],ignore_index=True)
        behavioral_data_df= pd.concat([behavioral_data_df ,behavioral_data_df_inner],ignore_index=True)
    #end of outer loop
        
    eye_data_df['Time'] = eye_data_df['Time'] * 0.001  #This is in milisecond convert to seconds                    
    behavioral_data_df["isCorrectResponse"] = behavioral_data_df["isCorrectResponse"]*1 #This will change the type for bool to int            
    return eye_data_df, behavioral_data_df


In [3]:
# data_dict = mat73.loadmat('NewPup_withGazeData.mat')
data = mat73.loadmat('data/NewPup_withGazeData.mat')

In [5]:
print('0: type:{:},    key:{}'.format(type(data)                                                                 , data.keys()   ))
print('1: type:{:},    key:{}'.format(type(data['NewPup'])                                                       , data['NewPup'].keys()   ))
print('2: type:{:},    key:{}'.format(type(data['NewPup']['DYN'])                                                 , data['NewPup']['DYN'].keys()   ))
print('3: type:{:},    len:{}'.format(type(data['NewPup']['DYN']['block'])                                        , len(data['NewPup']['DYN']['block'])   ))
print('4: type:{:},    key:{}'.format(type(data['NewPup']['DYN']['block'][0])                                     , data['NewPup']['DYN']['block'][0].keys()   ))
print('5: type:{:},    len:{}'.format(type(data['NewPup']['DYN']['block'][0]['Behavior'])                         , len(data['NewPup']['DYN']['block'][0]['Behavior'])   ))
print('6: type:{:},    len:{}'.format(type(data['NewPup']['DYN']['block'][0]['Behavior'][0])                      , len(data['NewPup']['DYN']['block'][0]['Behavior'][0])   ))
print('7: type:{:},    key:{}'.format(type(data['NewPup']['DYN']['block'][0]['Behavior'][0][0])                   , data['NewPup']['DYN']['block'][0]['Behavior'][0][0].keys()   ))
#print('8: type:{:},    len:{}'.format(type(data['NewPup']['DPT']['block'][0]['Behavior'][0][0]['DifficultyLevel']), len(data['NewPup']['DPT']['block'][0]['Behavior'][0][0]['DifficultyLevel'])   ))
print('9: print info:', data['NewPup']['DYN']['block'][0]['Behavior'][0][0]['info'])

0: type:<class 'dict'>,    key:dict_keys(['NewPup'])
1: type:<class 'dict'>,    key:dict_keys(['DPT', 'DYN', 'MA', 'PVT', 'REST', 'VWM'])
2: type:<class 'dict'>,    key:dict_keys(['block'])
3: type:<class 'list'>,    len:4
4: type:<class 'dict'>,    key:dict_keys(['Behavior', 'eyeposX', 'eyeposY', 'pupil_diam', 'pupil_raw', 'time_ms'])
5: type:<class 'list'>,    len:58
6: type:<class 'list'>,    len:10
7: type:<class 'dict'>,    key:dict_keys(['Aud', 'BlockNumber', 'FaceNumber', 'Image1', 'Image2', 'Rnumber', 'StimOnset', 'StimOnsetTime', 'StimTRTime', 'TRTime', 'Target', 'TrialNumber', 'info'])
9: print info: ['BlockNumber is integer label of block up to 4', 'TrialNumber is integer label trial num within block', 'FaceNumber is...contact James Elliott at UCSB to fill in rest of details. Sorry!']


In [4]:
# print('0: type:{:},    key:{}'.format(type(data)                                                                 , data.keys()   ))
# print('1: type:{:},    key:{}'.format(type(data)                                                       , data.keys()   ))
# print('2: type:{:},    key:{}'.format(type(data['REST'])                                                 , data['REST'].keys()   ))
# print('3: type:{:},    len:{}'.format(type(data['REST']['block'])                                        , len(data['REST']['block'])   ))
# print('4: type:{:},    key:{}'.format(type(data['REST']['block'][0])                                     , data['REST']['block'][0].keys()   ))
# print('5: type:{:},    len:{}'.format(type(data['REST']['block'][0]['Behavior'])                         , len(data['REST']['block'][0]['Behavior'])   ))
# print('6: type:{:},    len:{}'.format(type(data['REST']['block'][0]['Behavior'][0])                      , len(data['REST']['block'][0]['Behavior'][0])   ))
# print('7: type:{:},    key:{}'.format(type(data['REST']['block'][0]['Behavior'][0][0])                   , data['REST']['block'][0]['Behavior'][0][0].keys()   ))
# #print('8: type:{:},    len:{}'.format(type(data['NewPup']['DPT']['block'][0]['Behavior'][0][0]['DifficultyLevel']), len(data['NewPup']['DPT']['block'][0]['Behavior'][0][0]['DifficultyLevel'])   ))
# print('9: print info:', data['REST']['block'][0]['Behavior'][0][0]['info'])

In [5]:
data = data["NewPup"]  # this is the key for all the data
# all the data folders in our experiements are ['DPT', 'DYN', 'MA', 'PVT', 'REST', 'VWM'], we will focus on MA for now
# to access data do [taskType]["block"][0]
MA_data = getSubjectDataFromDataBlock(data["MA"]["block"][0])
PVT_data = getSubjectDataFromDataBlock(data["PVT"]["block"][0])
DPT_data = getSubjectDataFromDataBlock(data["DPT"]["block"][0])
VWM_data = getSubjectDataFromDataBlock(data["VWM"]["block"][0])


eye_df = pd.DataFrame(columns=[ 'Task Type','SID', "Trial", 'Time', 'Pupil Diameter', "Pupil X", "Pupil Y"])
behavioral_df = pd.DataFrame(columns=['Task Type', 'SID', 'Trial', 'Stimulus Time', "Reaction Time",
                                           "isCorrectResponse"])

for taskType in data.keys():
    # loops through
    print(taskType)
    if taskType == 'DYN':
        continue
    task_data = getSubjectDataFromDataBlock(data[taskType]["block"][0])  # gets organized dictionary with task data
    eye_data_df, behavioral_data_df = GetExperiementDataFrames(task_data, taskType)  # gets the actual dataframes
    
    eye_df = pd.concat([eye_df ,eye_data_df],ignore_index=True)
    behavioral_df= pd.concat([behavioral_df ,behavioral_data_df],ignore_index=True)



DPT
At subject:  0
At subject:  1
At subject:  2
At subject:  3
At subject:  4
At subject:  5
At subject:  6
At subject:  7
At subject:  8
At subject:  9
At subject:  10
At subject:  11
At subject:  12
At subject:  13
At subject:  14
At subject:  15
At subject:  16
At subject:  17
At subject:  18
At subject:  19
At subject:  20
At subject:  21
At subject:  22
At subject:  23
At subject:  24
At subject:  25
At subject:  26
At subject:  27
At subject:  28
At subject:  29
At subject:  30
At subject:  31
At subject:  32
At subject:  33
At subject:  34
At subject:  35
At subject:  36
At subject:  37
At subject:  38
At subject:  39
At subject:  40
At subject:  41
At subject:  42
At subject:  43
At subject:  44
At subject:  45
At subject:  46
At subject:  47
At subject:  48
At subject:  49
At subject:  50
At subject:  51
At subject:  52
At subject:  53
At subject:  54
At subject:  55
At subject:  56
At subject:  57
DYN
MA
At subject:  0
At subject:  1
At subject:  2
At subject:  3
At subject:

In [6]:
eye_df.to_csv("eye_data.csv",index=False)
behavioral_df.to_csv("behavior_data.csv",index=False)

In [8]:
eye_data_df

Unnamed: 0,Task Type,SID,Trial,Time,Pupil Diameter,Pupil X,Pupil Y
0,PVT,0,0,0.004,4.637736,571.0,426.700012
1,PVT,0,0,0.008,4.637219,571.5,426.299988
2,PVT,0,0,0.012,4.636733,571.700012,425.600006
3,PVT,0,0,0.016,4.636274,571.900024,424.299988
4,PVT,0,0,0.02,4.635839,571.900024,423.799988
...,...,...,...,...,...,...,...
41684964,PVT,57,7,623.076,2.579153,493.399994,322.0
41684965,PVT,57,7,623.08,2.576649,494.299988,321.899994
41684966,PVT,57,7,623.084,2.574309,497.0,319.399994
41684967,PVT,57,7,623.088,2.572498,493.299988,320.700012


In [14]:
behavioral_data_df[behavioral_data_df['isCorrectResponse'] == 0]

Unnamed: 0,Task Type,SID,Trial,Stimulus Time,Reaction Time,isCorrectResponse
74,PVT,0,0,584.151325,516.061068,0.0
146,PVT,0,1,529.061862,516.039848,0.0
163,PVT,0,2,66.824301,549.766064,0.0
187,PVT,0,2,241.046948,515.371799,0.0
353,PVT,0,4,312.620581,699.307919,0.0
...,...,...,...,...,...,...
21237,PVT,57,6,127.78541,599.560022,0.0
21243,PVT,57,6,162.683259,566.347837,0.0
21246,PVT,57,6,188.165036,549.58415,0.0
21252,PVT,57,6,239.878526,565.603971,0.0


In [2]:
behavior_df = pd.read_csv("data/behavior_data.csv")
behavior_df

Unnamed: 0,Task Type,SID,Trial,Stimulus Time,Reaction Time,isCorrectResponse
0,DPT,0,0,1.402413,605.970,1.0
1,DPT,0,0,4.852193,486.175,1.0
2,DPT,0,0,8.401990,443.381,1.0
3,DPT,0,0,11.735102,546.256,1.0
4,DPT,0,0,15.518205,487.143,1.0
...,...,...,...,...,...,...
112195,VWM,57,7,220.884844,322.137,1.0
112196,VWM,57,7,228.184393,379.615,1.0
112197,VWM,57,7,235.350625,352.227,1.0
112198,VWM,57,7,243.966769,353.536,1.0


In [None]:
eye_df = pd.read_csv("data/eye_data.csv")
eye_df