In [1]:
import scipy.io
import numpy as np
import csv
import librosa
import random
from scipy.stats import rankdata

In [2]:
id_eleccount = # Removed to protect patient anonymity

# Generating Word Onset Vectors

In [3]:
def segment_transcript_by_movie(patient_id, ecog1_t, ecog2_t):
    # Load in interview transcript
    with open('/scratch/gpfs/mayaar/GravityECoG/sourcedata/interview-transcripts/cleaned/cleaned_transcript_ny' + patient_id + '.csv', newline='') as f:
        reader = csv.reader(f)
        data = list(reader)
        
    # Movie 1
    onsets_1 = []
    # Movie 2
    onsets_2 = []

    # Check if word recorded during first or second ECoG recording
    for i in range(len(data)):
        if ((float(data[i][2]) > ecog1_t[0]) and (float(data[i][2]) < ecog1_t[-1])):
            onsets_1.append(data[i][2])
        if ((float(data[i][2]) > ecog2_t[0]) and (float(data[i][2]) < ecog2_t[-1])):
            onsets_2.append(data[i][2])

    onsets_1 = np.array(onsets_1, dtype=float)
    onsets_2 = np.array(onsets_2, dtype=float)
    
    return onsets_1, onsets_2

In [4]:
original_rate = 512 #Hz
downsampled_rate = 100 #Hz

def downsample_ecog(ecog):
    ecog_downsampled = librosa.resample(ecog, orig_sr=original_rate, target_sr=downsampled_rate)
    return ecog_downsampled   

def save_downsampled_ecog(patient_id, ecog1_t, ecog2_t):    
    # Downsample EcoG to 100 Hz
    ecog1 = np.load("../../sourcedata/non-ranked-ecog/nr-ecog1-" + patient_id + ".npy", allow_pickle = True)
    ecog2 = np.load("../../sourcedata/non-ranked-ecog/nr-ecog2-" + patient_id + ".npy", allow_pickle = True)
    ds_ecog1 = np.apply_along_axis(downsample_ecog, 1, ecog1)
    ds_ecog2 = np.apply_along_axis(downsample_ecog, 1, ecog2)
    
    ds_ecog1_t = np.linspace(ecog1_t[0], ecog1_t[-1], ds_ecog1.shape[1])
    ds_ecog2_t = np.linspace(ecog2_t[0], ecog2_t[-1], ds_ecog2.shape[1])
    
    # Save dowsampled ECog + Time Axis
    np.save("ds-ecog1-" + patient_id + ".npy", ds_ecog1)
    np.save("ds-ecog1-" + patient_id + "-t.npy", ds_ecog1_t)
    np.save("ds-ecog2-" + patient_id + ".npy", ds_ecog2)
    np.save("ds-ecog2-" + patient_id + "-t.npy", ds_ecog2_t)
    
    # Tie Ranking Downsampled ECoG
    tie_ranked_ecog1 = rankdata(ds_ecog1, axis=1)
    tie_ranked_ecog2 = rankdata(ds_ecog2, axis=1)
    np.save("tr-ds-ecog1-" + patient_id + ".npy", tie_ranked_ecog1)
    np.save("tr-ds-ecog2-" + patient_id + ".npy", tie_ranked_ecog2)
    
    return ds_ecog1_t, ds_ecog2_t

In [5]:
def get_ecog_index_of_onset(word_onset, ecog_time_axis):
    # calculate the difference array
    difference_array = np.absolute(ecog_time_axis-word_onset)

    # find the index of minimum element from the array
    index = difference_array.argmin()
    
    return index

def get_word_onset_markers(patient_id, onsets_1, onsets_2, ds_ecog1_t, ds_ecog2_t):
    word_onsets_1 = np.zeros(ds_ecog1_t.shape[0])
    for i in range(len(onsets_1)):
        word_onsets_1[get_ecog_index_of_onset(onsets_1[i], ds_ecog1_t)] = 1
        
    word_onsets_2 = np.zeros(ds_ecog2_t.shape[0])
    for i in range(len(onsets_2)):
        word_onsets_2[get_ecog_index_of_onset(onsets_2[i], ds_ecog2_t)] = 1
    
    np.save("ecog1-" + patient_id + "-onset-marker.npy", word_onsets_1)
    np.save("ecog2-" + patient_id + "-onset-marker.npy", word_onsets_2)
    
    return word_onsets_1, word_onsets_2

In [6]:
def robust_marker_shuffle(word_onsets):
    # Step 1: Replace each element in onset marker array with number of 1s seen 
    # thusfar up to the current element
    cumm_sum = np.zeros(len(word_onsets))
    j = 0
    for i in range(len(word_onsets)):
        if word_onsets[i] == 1:
            j += 1
        cumm_sum[i] = j

    # Step 2: Group cummulative sum array based on number of neighboring identical elements
    counts = []
    counter = 0
    for i in range(len(cumm_sum) - 1):
        counter += 1
        if cumm_sum[i] != cumm_sum[i + 1] :
            counts.append(counter)
            counter = 0

    # Step 3: Randomly shuffle neighboring identical element counts
    random.shuffle(counts)

    # Step 4: Re-expand shuffled array into vector of 0s and 1s
    accum_counts = np.add.accumulate(counts)
    expanded_arr = np.zeros(len(word_onsets))
    for i in range(len(accum_counts)):
        expanded_arr[accum_counts[i]] = 1

    return expanded_arr

def shuffle_onset_markers(word_onsets_1, word_onsets_2):
    shuff_markers_1 = np.zeros((1000, len(word_onsets_1)))
    shuff_markers_2 = np.zeros((1000, len(word_onsets_2)))

    for i in range(1000):
        shuff_markers_1[i, :] = robust_marker_shuffle(word_onsets_1)
        shuff_markers_2[i, :] = robust_marker_shuffle(word_onsets_2)
        
    np.save("ecog1-" + patient_id + "-shuffled-markers.npy", shuff_markers_1)
    np.save("ecog2-" + patient_id + "-shuffled-markers.npy", shuff_markers_2)

In [7]:
for patient_id in id_eleccount:
    # Load in ECoG time axis
    ecog1_t = scipy.io.loadmat('/scratch/gpfs/mayaar/GravityECoG/derivatives/preprocessing/sub-ny' + patient_id + '/eeg1_manualica_notch_time.mat')['trial'][0]
    ecog1_t = np.array(ecog1_t, dtype=float)
    ecog2_t = scipy.io.loadmat('/scratch/gpfs/mayaar/GravityECoG/derivatives/preprocessing/sub-ny' + patient_id + '/eeg2_manualica_notch_time.mat')['trial'][0]
    ecog2_t = np.array(ecog2_t, dtype=float)
    
    # Separate word onsets into corresponding ECoG recordings (1 and 2)
    onsets_1, onsets_2 = segment_transcript_by_movie(patient_id, ecog1_t, ecog2_t)
    
    # Downsample EcoG to 100 Hz + Tie Rank
    ds_ecog1_t, ds_ecog2_t = save_downsampled_ecog(patient_id, ecog1_t, ecog2_t)
    
    # Create a long vector of zeros (length of the time axis) and set it to one at word onsets
    word_onsets_1, word_onsets_2 = get_word_onset_markers(patient_id, onsets_1, onsets_2, ds_ecog1_t, ds_ecog2_t)
    
    # Shuffle Onset Markers 1000 times
    shuffle_onset_markers(word_onsets_1, word_onsets_2)