In [1]:
import pyxdf
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 
import wave
#import pyaudio
import numpy as np
#import sounddevice as sd
import cv2
from scipy.signal import iirnotch, filtfilt
from glob import glob

In [2]:
subject_dirs = glob('/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/*')
subject_dirs

['/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/sub-P5070899_task-CUNY_run-001_physio.h5',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/sub-P5070899_task-CUNY_run-001_mic.wav',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/sub-P5070899_task-CUNY_run-001_eyes.csv',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/sub-P5070899_task-CUNY_run-001_behavior.csv',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/sub-P5070899_task-CUNY_run-001_behavior.log',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/sub-P5070899_ses-S001_task-CUNY_run-001_mobi.xdf',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/sub-P5070899_task-CUNY_run-001_video.avi',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/sub-P5070899_task-CUNY_run-001_behavior.psydat',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/P5070899_20250217_042741.mff']

# Load Data

In [3]:
xdf_path =  '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/sub-P5070899_ses-S001_task-CUNY_run-001_mobi.xdf'
data, header = pyxdf.load_xdf(xdf_path)
streams_collected = [stream['info']['name'][0] for stream in data]
streams_collected

['EGI NetAmp 0',
 'OpenSignals',
 'Stimuli_Markers',
 'WebcamStream',
 'Tobii',
 'Microphone']

# Stimulus

In [4]:
stim_dat = data[streams_collected.index('Stimuli_Markers')]
stim_df = pd.DataFrame(stim_dat['time_series'])
stim_df.rename(columns={0: 'trigger'}, inplace=True)
events = {
    200: 'Onset_ExperimentStart',
    10: 'Onset_RestingState',
    11: 'Offset_RestingState',
    500: 'Onset_StoryListening',
    501: 'Offset_StoryListening',
    100: 'Onset_10second_rest',
    101: 'Offset_10second_rest', 
    20: 'Onset_CampFriend',
    21: 'Offset_CampFriend',
    30: 'Onset_FrogDissection',
    31: 'Offset_FrogDissection',
    40: 'Onset_DanceContest',
    41: 'Offset_DanceContest',
    50: 'Onset_ZoomClass',
    51: 'Offset_ZoomClass',
    60: 'Onset_Tornado',
    61: 'Offset_Tornado',
    70: 'Onset_BirthdayParty',
    71: 'Offset_BirthdayParty',
    300: 'Onset_subjectInput',
    301: 'Offset_subjectInput',
    302: 'Onset_FavoriteStory',
    303: 'Offset_FavoriteStory',
    304: 'Onset_WorstStory',
    305: 'Offset_WorstStory',
    400: 'Onset_impedanceCheck',
    401: 'Offset_impedanceCheck',
    80: 'Onset_SocialTask',
    81: 'Offset_SocialTask',
    201: 'Offset_ExperimentEnd',
}

story_onsets = [10, 20, 30, 40, 50, 60, 70, 80, 100, 300, 400]

# relabel the event if the trigger is in the events dictionary, else if 
stim_df['event'] = stim_df['trigger'].apply(lambda x: events[x] if x in events.keys() else 'Bx_input')

# relabel the event as a psychopy timestamp if the trigger is greater than 5 digits
stim_df.loc[stim_df.trigger.astype(str).str.len() > 5, 'event'] = 'psychopy_time_stamp'
stim_df['lsl_time_stamp'] = stim_dat['time_stamps']
stim_df['time'] = (stim_dat['time_stamps'] - stim_dat['time_stamps'][0])/1000
stim_df

Unnamed: 0,trigger,event,lsl_time_stamp,time
0,2.000000e+02,Onset_ExperimentStart,486663.454963,0.000000e+00
1,1.739828e+09,psychopy_time_stamp,486663.454971,7.899944e-09
2,1.000000e+01,Onset_RestingState,486714.152246,5.069728e-02
3,1.739828e+09,psychopy_time_stamp,486714.152254,5.069729e-02
4,1.100000e+01,Offset_RestingState,487014.150930,3.506960e-01
...,...,...,...,...
254,1.739830e+09,psychopy_time_stamp,488481.156269,1.817701e+00
255,8.100000e+01,Offset_SocialTask,488781.651469,2.118197e+00
256,1.739830e+09,psychopy_time_stamp,488781.651480,2.118197e+00
257,2.010000e+02,Offset_ExperimentEnd,488786.651354,2.123196e+00


In [5]:
# Function to calculate time between two triggeres

def get_secs_between_triggers(trigger1, trigger2):
    return stim_df.loc[stim_df.trigger == trigger1, 'lsl_time_stamp'].values[0] - stim_df.loc[stim_df.trigger == trigger2, 'lsl_time_stamp'].values[0]

In [6]:
stim_df.loc[stim_df.trigger == 301, 'lsl_time_stamp'].values[0] - stim_df.loc[stim_df.trigger == 300, 'lsl_time_stamp'].values[0]

11.041776231781114

In [7]:
stim_df.loc[stim_df.trigger == 301, 'lsl_time_stamp'].values[3] - stim_df.loc[stim_df.trigger == 300, 'lsl_time_stamp'].values[3]

5.503423618618399

In [8]:
def get_event_data(event, df, stim_df=stim_df):
    return df.loc[(df.lsl_time_stamp >= stim_df.loc[stim_df.event == 'Onset_'+event, 'lsl_time_stamp'].values[0]) & 
                  (df.lsl_time_stamp <= stim_df.loc[stim_df.event == 'Offset_'+event, 'lsl_time_stamp'].values[0])]




In [9]:
# confirm all events are in the stim_df
evs = stim_df.loc[stim_df.event != 'psychopy_time_stamp']
evs

Unnamed: 0,trigger,event,lsl_time_stamp,time
0,200.0,Onset_ExperimentStart,486663.454963,0.000000
2,10.0,Onset_RestingState,486714.152246,0.050697
4,11.0,Offset_RestingState,487014.150930,0.350696
6,500.0,Onset_StoryListening,487014.150970,0.350696
8,100.0,Onset_10second_rest,487032.818604,0.369364
...,...,...,...,...
249,400.0,Onset_impedanceCheck,488125.102564,1.461648
251,401.0,Offset_impedanceCheck,488460.067837,1.796613
253,80.0,Onset_SocialTask,488481.156257,1.817701
255,81.0,Offset_SocialTask,488781.651469,2.118197


### Check if markers are missing

In [10]:
def missing_markers(events, stim_df):
    missing_markers=[]
    for event in events:
        if event in stim_df.event:
            return None
        else:
            missing_markers = missing_markers + [event]
        return missing_markers

result = missing_markers(events, stim_df)
print(result)

None


### Checking durations of all story listening, resting state and social script

In [11]:
# Get durations of all story listening sessions

durations = pd.DataFrame({
    'trigger':story_onsets,
    'story':[events[x] for x in story_onsets],
    'lsl_duration': [get_secs_between_triggers(x+1, x) for x in story_onsets]})
durations

Unnamed: 0,trigger,story,lsl_duration
0,10,Onset_RestingState,299.998684
1,20,Onset_CampFriend,140.445913
2,30,Onset_FrogDissection,114.270418
3,40,Onset_DanceContest,127.112438
4,50,Onset_ZoomClass,90.393858
5,60,Onset_Tornado,149.991003
6,70,Onset_BirthdayParty,147.228141
7,80,Onset_SocialTask,300.495212
8,100,Onset_10second_rest,9.997758
9,300,Onset_subjectInput,11.041776


### Checking whether duration of resting state and social script is ~300s

In [12]:
def rest_and_social_duration(trigger, durations):
    trial_duration = durations.loc[durations['trigger'] == trigger].lsl_duration.to_list()[0]
    print(trial_duration)
    if trial_duration <= 305.0  and trial_duration >= 298.0:
        return True
    else:
        return False

resting_state_duration_bool = rest_and_social_duration(10, durations)
print('Is duration of resting state ~300s? ', resting_state_duration_bool)
social_script_duration_bool = rest_and_social_duration(80, durations)
print('Is duration of social script ~300s? ', social_script_duration_bool)



299.99868435016833
Is duration of resting state ~300s?  True
300.49521212867694
Is duration of social script ~300s?  True


### Duration of Impedance check

In [13]:
impedance_check_duration = durations.loc[durations.trigger == 400].lsl_duration.to_list()[0]
print('Duration of Impedance check: ', impedance_check_duration)

Duration of Impedance check:  334.9652733117109


### Plot

In [16]:
color_codes = {
    'Onset_impedanceCheck': 'gold',
    'Onset_SocialTask': 'indianred',
    'Onset_BirthdayParty': 'saddlebrown',
    'Onset_Tornado': 'forestgreen',
    'Onset_ZoomClass':'teal',
    'Onset_DanceContest': 'slateblue',
    'Onset_FrogDissection': 'rebeccapurple',
    'Onset_CampFriend': 'palevioletred',
    'Onset_RestingState' : 'coral'
}

In [None]:
# Plot for story listening and social task durations
colors = durations['story'].map(color_codes)
durations = durations.iloc[::-1].reset_index(drop=True)
colors = colors.iloc[::-1].reset_index(drop=True)

plt.figure(figsize=(15, 5))
bars = plt.barh(durations['story'], durations['lsl_duration'], color=colors)
plt.ylabel('Task')
plt.xlabel('LSL Duration (seconds)')
plt.title('Task Durations')
plt.yticks(rotation=0)
#plt.gca().spines['right'].set_visible(False)

for bar in bars:
    plt.text(bar.get_width(), bar.get_y() + bar.get_height() / 2, f'{bar.get_width():.1f}', va='center')

plt.show()

### Checking 10 s rest durations 

In [14]:
# Count the number of occurrences of trigger value 100 using sum
trigger_count = (evs['trigger'] == 100).sum()

print(f"Number of trigger 100: {trigger_count}")

#rest_onsets = 
ten_secs_rest_durations = pd.DataFrame({
    'trigger':[x for x in range(trigger_count)],
    'story': ['Onset_10second_rest' for x in range(trigger_count)],
    'lsl_duration': [get_secs_between_triggers(x+1, x) for x in evs['trigger'] if x == 100]})
#print(ten_secs_rest_durations)

# Check if rest durations are equal
equal_rest_durations = all(x == ten_secs_rest_durations['lsl_duration'][0] for x in ten_secs_rest_durations['lsl_duration'])
print("Are all 10 seconds rest equal?", equal_rest_durations)

Number of trigger 100: 6
Are all 10 seconds rest equal? True


### Check response time of each question

In [None]:
# Count the number of occurrences of trigger value 100 using sum
trigger_count = (evs['trigger'] == 300).sum()

print(f"Number of trigger 300: {trigger_count}")

qa_durations = pd.DataFrame({
    'trigger':[x for x in range(trigger_count)],
    'story': ['Onset_subjectInput' for x in range(trigger_count)],
    'lsl_duration': [get_secs_between_triggers(x+1, x) for x in evs['trigger'] if x == 300]})
#print(qa_durations)
print([x for x in evs['trigger'] if x == 300])
print(qa_durations.lsl_duration)

### Average response time of questions across all trials

In [None]:
response_times = []
trial_response_times = []
trigger_idx = 0
for idx, x in enumerate(stim_df['trigger']):
    if idx in stim_df.loc[stim_df.trigger == 300,'lsl_time_stamp'].index:
        response_times = response_times + [stim_df.loc[stim_df.trigger == 301, 'lsl_time_stamp'].values[trigger_idx] - stim_df.loc[stim_df.trigger == 300, 'lsl_time_stamp'].values[trigger_idx]]
        trigger_idx = trigger_idx + 1
    elif x == 100 or x == 400:
        trial_response_times = trial_response_times + [sum(response_times)]
        response_times = []
average_response_time = sum(trial_response_times)/len(trial_response_times)        
print('Response time for all questions for each story listing task: ', trial_response_times) 
print('Average response time for all question across all trials', average_response_time)     
#average_response_times = sum(response_times)/len(response_times)
#average_response_times

[0, 34.440039259789046, 28.54655083612306, 26.384979754628148, 22.774184299923945, 26.332958660728764, 24.475186491385102]
23.279128471796867
