In [4]:
import pyxdf
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 
import wave
#import pyaudio
import numpy as np
#import sounddevice as sd
from utils import *
import cv2
from scipy.signal import iirnotch, filtfilt
from glob import glob

In [18]:
subject_dirs = glob('/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5029423/*')
subject_dirs

['/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5029423/sub-P5029423_task-CUNY_run-001_eyes.csv',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5029423/sub-P5029423_ses-S001_task-CUNY_run-001_mobi.xdf',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5029423/P5029423_20250129_111625.mff',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5029423/sub-P5029423_task-CUNY_run-001_behavior.log',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5029423/sub-P5029423_task-CUNY_run-001_behavior.csv',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5029423/sub-P5029423_task-CUNY_run-001_physio.h5',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5029423/sub-P5029423_task-CUNY_run-001_video.avi',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5029423/sub-P5029423_task-CUNY_run-001_behavior.psydat',
 '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5029423/sub-P5029423_task-CUNY_run-001_mic.wav']

# Load Data

In [8]:
#xdf_path =  '/Users/apurva.gokhe/Documents/CUNY_QC/data/sub-P5070899/sub-P5070899_ses-S001_task-CUNY_run-001_mobi.xdf'
#data, header = pyxdf.load_xdf(xdf_path)
data, header = load_xdf_from_zip('/Users/apurva.gokhe/Documents/CUNY_QC/sub-P5029423.tar.gz')
streams_collected = [stream['info']['name'][0] for stream in data]
streams_collected

['Tobii',
 'WebcamStream',
 'Stimuli_Markers',
 'EGI NetAmp 0',
 'OpenSignals',
 'Microphone']

# Stimulus

In [9]:
stim_dat = data[streams_collected.index('Stimuli_Markers')]
stim_df = pd.DataFrame(stim_dat['time_series'])
stim_df.rename(columns={0: 'trigger'}, inplace=True)
events = {
    200: 'Onset_ExperimentStart',
    10: 'Onset_RestingState',
    11: 'Offset_RestingState',
    500: 'Onset_StoryListening',
    501: 'Offset_StoryListening',
    100: 'Onset_10second_rest',
    101: 'Offset_10second_rest', 
    20: 'Onset_CampFriend',
    21: 'Offset_CampFriend',
    30: 'Onset_FrogDissection',
    31: 'Offset_FrogDissection',
    40: 'Onset_DanceContest',
    41: 'Offset_DanceContest',
    50: 'Onset_ZoomClass',
    51: 'Offset_ZoomClass',
    60: 'Onset_Tornado',
    61: 'Offset_Tornado',
    70: 'Onset_BirthdayParty',
    71: 'Offset_BirthdayParty',
    300: 'Onset_subjectInput',
    301: 'Offset_subjectInput',
    302: 'Onset_FavoriteStory',
    303: 'Offset_FavoriteStory',
    304: 'Onset_WorstStory',
    305: 'Offset_WorstStory',
    400: 'Onset_impedanceCheck',
    401: 'Offset_impedanceCheck',
    80: 'Onset_SocialTask',
    81: 'Offset_SocialTask',
    201: 'Offset_ExperimentEnd',
}

story_onsets = [10, 20, 30, 40, 50, 60, 70, 80, 100, 300, 400, 200]

# relabel the event if the trigger is in the events dictionary, else if 
stim_df['event'] = stim_df['trigger'].apply(lambda x: events[x] if x in events.keys() else 'Bx_input')

# relabel the event as a psychopy timestamp if the trigger is greater than 5 digits
stim_df.loc[stim_df.trigger.astype(str).str.len() > 5, 'event'] = 'psychopy_time_stamp'
stim_df['lsl_time_stamp'] = stim_dat['time_stamps']
stim_df['time'] = (stim_dat['time_stamps'] - stim_dat['time_stamps'][0])/1000
stim_df

Unnamed: 0,trigger,event,lsl_time_stamp,time
0,2.000000e+02,Onset_ExperimentStart,185787.714273,0.000000e+00
1,1.738168e+09,psychopy_time_stamp,185787.714296,2.259971e-08
2,1.000000e+01,Onset_RestingState,185814.229936,2.651566e-02
3,1.738168e+09,psychopy_time_stamp,185814.229944,2.651567e-02
4,1.100000e+01,Offset_RestingState,186114.235558,3.265213e-01
...,...,...,...,...
256,1.738170e+09,psychopy_time_stamp,187994.410770,2.206696e+00
257,2.010000e+02,Offset_ExperimentEnd,187999.411616,2.211697e+00
258,1.738170e+09,psychopy_time_stamp,187999.411631,2.211697e+00
259,4.000000e+00,Bx_input,187999.411756,2.211697e+00


In [10]:
# Function to calculate time between two triggeres

def get_secs_between_triggers(trigger1, trigger2):
    return stim_df.loc[stim_df.trigger == trigger1, 'lsl_time_stamp'].values[0] - stim_df.loc[stim_df.trigger == trigger2, 'lsl_time_stamp'].values[0]

In [11]:
# confirm all events are in the stim_df
evs = stim_df.loc[stim_df.event != 'psychopy_time_stamp']
evs

Unnamed: 0,trigger,event,lsl_time_stamp,time
0,200.0,Onset_ExperimentStart,185787.714273,0.000000
2,10.0,Onset_RestingState,185814.229936,0.026516
4,11.0,Offset_RestingState,186114.235558,0.326521
6,500.0,Onset_StoryListening,186114.235596,0.326521
8,100.0,Onset_10second_rest,186150.712077,0.362998
...,...,...,...,...
251,401.0,Offset_impedanceCheck,187676.164065,1.888450
253,80.0,Onset_SocialTask,187693.919192,1.906205
255,81.0,Offset_SocialTask,187994.410744,2.206696
257,201.0,Offset_ExperimentEnd,187999.411616,2.211697


### Check if markers are missing

In [12]:
def missing_markers(events, stim_df):
    missing_markers=[]
    for event in events:
        if event in stim_df.event:
            return None
        else:
            missing_markers = missing_markers + [event]
        return missing_markers

result = missing_markers(events, stim_df)
print(result)

None


### Checking durations of all story listening, resting state and social script

In [13]:
# Get durations of all story listening sessions

durations = pd.DataFrame({
    'trigger':story_onsets,
    'story':[events[x] for x in story_onsets],
    'lsl_duration': [get_secs_between_triggers(x+1, x) for x in story_onsets]})
durations

Unnamed: 0,trigger,story,lsl_duration
0,10,Onset_RestingState,300.005622
1,20,Onset_CampFriend,140.436595
2,30,Onset_FrogDissection,114.257204
3,40,Onset_DanceContest,127.101432
4,50,Onset_ZoomClass,90.38403
5,60,Onset_Tornado,149.983831
6,70,Onset_BirthdayParty,147.208981
7,80,Onset_SocialTask,300.491552
8,100,Onset_10second_rest,10.001131
9,300,Onset_subjectInput,30.988169


### Checking whether duration of resting state and social script is ~300s

In [14]:
def rest_and_social_duration(trigger, durations):
    trial_duration = durations.loc[durations['trigger'] == trigger].lsl_duration.to_list()[0]
    print(trial_duration)
    if trial_duration <= 305.0  and trial_duration >= 298.0:
        return True
    else:
        return False

resting_state_duration_bool = rest_and_social_duration(10, durations)
print('Is duration of resting state ~300s? ', resting_state_duration_bool)
social_script_duration_bool = rest_and_social_duration(80, durations)
print('Is duration of social script ~300s? ', social_script_duration_bool)


300.00562166958116
Is duration of resting state ~300s?  True
300.4915517952759
Is duration of social script ~300s?  True


### Duration of Impedance check

In [15]:
impedance_check_duration = durations.loc[durations.trigger == 400].lsl_duration.to_list()[0]
print('Duration of Impedance check: ', impedance_check_duration)

Duration of Impedance check:  383.5995829163585


### Checking 10 s rest durations 

In [16]:
# Count the number of occurrences of trigger value 100 using sum
trigger_count = (evs['trigger'] == 100).sum()

print(f"Number of trigger 100: {trigger_count}")

#rest_onsets = 
ten_secs_rest_durations = pd.DataFrame({
    'trigger':[x for x in range(trigger_count)],
    'story': ['Onset_10second_rest' for x in range(trigger_count)],
    'lsl_duration': [get_secs_between_triggers(x+1, x) for x in evs['trigger'] if x == 100]})
#print(ten_secs_rest_durations)

# Check if rest durations are equal
equal_rest_durations = all(x == ten_secs_rest_durations['lsl_duration'][0] for x in ten_secs_rest_durations['lsl_duration'])
print("Are all 10 seconds rest equal?", equal_rest_durations)

Number of trigger 100: 6
Are all 10 seconds rest equal? True


### Average response time of questions across all trials

In [17]:
# Get sequence of story listening 
sequence = pd.DataFrame({
    'trigger': [x for x in evs.trigger if x in story_onsets],
    'story':[events[x] for x in evs.trigger if x in story_onsets],
    'lsl_duration': [get_secs_between_triggers(x+1, x) for x in evs.trigger if x in story_onsets]
})
#print(sequence)
# Number of questions per story listening
qa_duration = []
for idx, x in enumerate(sequence.trigger):
    if x == 300:
        one_story_qa_duration = one_story_qa_duration + sequence.lsl_duration[idx]
        continue
    elif (x == 100 or x ==400) and one_story_qa_duration != 0:
        qa_duration = qa_duration + [one_story_qa_duration]
    else:
        one_story_qa_duration = 0
average_response_time = sum(qa_duration)/len(qa_duration)
print(qa_duration)
print('Average response time across all story listening task: ', average_response_time)


[185.92901480896398, 185.92901480896398, 185.92901480896398, 185.92901480896398, 185.92901480896398, 185.92901480896398]
Average response time across all story listening task:  185.92901480896398
