In [None]:
import mne
import os
import scipy.io
import listen_italian_functions
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import pickle
import warnings
warnings.filterwarnings('ignore')
from itertools import permutations,combinations
from IPython.display import clear_output

data_path = os.path.dirname(os.path.dirname(os.getcwd()))

subject_name = ['Alice','Andrea','Daniel','Elena','Elenora','Elisa','Federica','Francesca','Gianluca1','Giada','Giorgia',
                'Jonluca','Laura','Leonardo','Linda','Lucrezia','Manu','Marco','Martina','Pagani','Pasquale','Sara',
                'Silvia','Silvia2','Tommaso']

remove_first = 0.5 #seconds


# epoching and saving

In [None]:
# extract trials of tmax second and remove the wrong answer trials and seperate them in three conditions
Tmin = 0
Tmax = 3.51
trial_len = 2

for s in subject_name:
    raw_fname = data_path + '/python/data/'+s+'_raw.fif'
    raw = mne.io.read_raw_fif(raw_fname,preload=True)
    raw_fname = data_path + '/behaviour/data/subject/'+s+'_behaviour.mat'
    mat = scipy.io.loadmat(raw_fname)
    epochs = listen_italian_functions.epoch(raw, mat,Tmin, Tmax)
    save_path = data_path + '/python/data/coherence_epochs/'+s+'-coh-epo-'+str(Tmin)+'-'+str(Tmax)+'-trialLen-'+str(trial_len)+'.fif'
    epochs.save(save_path)
    print('----------------------------------------------------------------------------------------------------------------'+s)



# Read the epoches

In [None]:
Tmin = 0
Tmax = 3.51
trial_len = 2

GA_epoches = []
for s in subject_name:
    save_path = data_path + '/python/data/coherence_epochs/'+s+'-coh-epo-'+str(Tmin)+'-' \
    +str(Tmax)+'-trialLen-'+str(trial_len)+'.fif'
    epochs = mne.read_epochs(save_path)
    GA_epoches.append(epochs)
    print('----------------------------------------------------------------------------------------------------------------'+s)


In [None]:
condition = ['Hyper','Normal','Hypo']
frames = []
for s in range(0,len(subject_name)):
    df = pd.DataFrame({'Condition':'Hyper','Subject':subject_name[s],'noTrials':GA_epoches[s]['hyper'].get_data().shape[0]},index=[s])
    df = df.append(pd.DataFrame({'Condition':'Normal','Subject':subject_name[s],'noTrials':GA_epoches[s]['normal'].get_data().shape[0]},index=[s]))
    df = df.append(pd.DataFrame({'Condition':'Hypo','Subject':subject_name[s],'noTrials':GA_epoches[s]['hypo'].get_data().shape[0]},index=[s]))
    frames.append(df)
    
data=pd.concat((frames),axis=0)

In [None]:
a=data.groupby(['Subject','Condition']).sum()['noTrials'].unstack().plot(kind='bar',figsize=(20,5),grid=True)

# Measuring the coherence

In [None]:
def coherence_preprocess_delay(epochs,remove_first,d,trial_len,extra_channels,eeg_channles,condition):	

    if condition != 'All':
        E = epochs[condition].copy()
    else:
        E = epochs.copy()
        
    eeg = E.copy().pick_channels(eeg_channles)
    speech = E.copy().pick_channels(extra_channels)

    E = eeg.copy().crop(d+remove_first,d+remove_first+trial_len)
    S = speech.copy().crop(0.5+remove_first,0.5+remove_first+trial_len)
    
    #E = eeg.copy().crop(0.5+remove_first,0.5+remove_first+trial_len)
    #S = speech.copy().crop(d+remove_first,d+remove_first+trial_len)
    
    c = np.concatenate((E.get_data(),S.get_data()),axis=1)
    
    return c


def get_coherence(epochs,sfreq,fmin,fmax,indices):
    con, freqs, times, n_epochs, n_tapers = mne.connectivity.spectral_connectivity(epochs, method='coh',mode='multitaper', 
                                                                                   sfreq=sfreq, fmin = fmin,fmax=fmax,
                                                                                   indices=indices, tmin=0, 
                                                                                   mt_adaptive=True,faverage=True,
                                                                                   block_size=1000,verbose='ERROR')

    return con

In [None]:
iter_freqs = [
    ('fr', 1, 3),
    ('fr', 4, 8),
    ('fr', 8, 12),
    ('fr', 12, 18),
    ('fr', 18, 24),
    ('fr', 24, 40)
]

fmin = []
fmax = []
for fr in range(0,len(iter_freqs)):
    fmin.append(iter_freqs[fr][1])
    fmax.append(iter_freqs[fr][2])
    
features = ['envelop','jawaopening','lipaparature','lipProtrusion','TTCD','TMCD','TBCD']
eeg_channles = GA_epoches[0].ch_names[0:59]
sfreq = GA_epoches[0].info['sfreq']

condition = ['hyper','normal','hypo','All']
condition = ['All']

delay = np.arange(-5,6) / 10
delay = [0.2]


indices = (np.repeat([np.arange(59,len(features)+59)],59),np.tile(np.arange(0,59),len(features))) 

frames = []
for s in range(0,len(subject_name)):
    for d in delay:        
        for con in condition:
            c = coherence_preprocess_delay(GA_epoches[s],remove_first,d+0.5,trial_len,features,eeg_channles,con)
            coh = get_coherence(c,sfreq,fmin,fmax,indices)

            for fr in range(0,len(iter_freqs)):
                a = str(iter_freqs[fr][0])+ ' '+str(iter_freqs[fr][1])+' - '+str(iter_freqs[fr][2])+'Hz'
                cc = np.split(coh[:,fr], len(features))
                for f in range(0,len(features)):
                    feature = features[f]
                    df = pd.DataFrame({'Condition':con,'Freq':a,'Delay':d,'Subject':subject_name[s],'Feature':feature,
                                   'Data':[cc[f].flatten()],'noTrials':GA_epoches[s].get_data().shape[0],})
                    frames.append(df) 
                
        print(str(d)+'-'+subject_name[s])
                
data=pd.concat((frames),axis=0)
save_path = data_path + '/python/data/coherence/Coh-removedFirst-'+str(remove_first)+'.pkl'
data.to_pickle(save_path)

# Bootstrapping and creating a surrogate distribution for each subject. substract the average of the distribution from the original coherence value.

In [None]:
def Coherence_preprocess_delay_surrogate(epochs,remove_first,d,trial_len,eeg_channles,keep_feat,condition,iter_freqs):	

    
    ##############
    if condition != 'All':
        E = epochs[condition].copy()
    else:
        E = epochs.copy()
    
    eeg = E.copy().pick_channels(eeg_channles)
    speech = E.copy().pick_channels(keep_feat)

    E = eeg.copy().crop(d+remove_first,d+remove_first+trial_len)
    S = speech.copy().crop(0.5+remove_first,0.5+remove_first+trial_len)
    
    #E = eeg.copy().crop(0.5+remove_first,0.5+remove_first+trial_len)
    #S = speech.copy().crop(d+remove_first,d+remove_first+trial_len)
    
    
    sfreq = E.info['sfreq']
    
    E = E.get_data()
    S = S.get_data()

    label = np.concatenate((eeg.ch_names,speech.ch_names))
    
    ##################### all possible combination
    trial_length=S.shape[0]
    a = list(permutations(np.arange(0,trial_length), 2))
    a = np.asarray(a)
    X = np.arange(0,trial_length)

    no_surrogates = 500 #dummy value
    B=[]
    for j in range(no_surrogates):
        X = np.roll(X,1)
        while True:
            A,a = get_combinations(X,a)        
            if A.shape[0] == trial_length:
                B.append(A)
                break
            elif len(a)==0:
                break
            else:
                X = np.roll(X,1)
                print('.',end=' ')
    
    B = np.asarray(B)
    no_surrogates = len(B)
    
    #######################################à
    fmin = []
    fmax = []
    for fr in range(0,len(iter_freqs)):
        fmin.append(iter_freqs[fr][1])
        fmax.append(iter_freqs[fr][2])  
    
    #######################################
    indices = []
    b = (np.repeat(59,59),np.arange(0,59))
    indices.append(b)
    b = (np.repeat(60,59),np.arange(0,59))
    indices.append(b)

    indices = np.concatenate((indices),axis=1)
    indices = (indices[0],indices[1])
    #######################################  

    frames = np.zeros((len(eeg_channles)*len(keep_feat),len(iter_freqs),no_surrogates))
    for i in range(no_surrogates):
        print('--------------------'+str(i))
        EE = E.copy()
        SS = S.copy()
        c = np.concatenate((EE[B[i][:,0]],SS[B[i][:,1]]),axis=1)
        
        coh = get_coherence(c,sfreq,fmin,fmax,indices)
        frames[:,:,i] = coh
        clear_output()  
        
    return frames,no_surrogates

def get_combinations(X,a):
    aa = a
    A=[]
    EEG = []
    Speech = []
    for i in range(0,len(X)):
        b = np.where(a[:,0]==X[i])
        if not len(b[0]) == 0:
            for k in range(len(b[0])):
                if not a[b[0][k],1] in Speech:
                    A.append(a[b[0][k],:])
                    EEG.append(a[b[0][k],0])
                    Speech.append(a[b[0][k],1])
                    a = np.delete(a, b[0][k], 0)
                    break
    if len(A) == len(X):                    
        return np.asarray(A),a
    else:
        return np.asarray(A),aa	

def get_coherence(epochs,sfreq,fmin,fmax,indices):
    con, freqs, times, n_epochs, n_tapers = mne.connectivity.spectral_connectivity(epochs, method='coh',mode='multitaper', 
                                                                                   sfreq=sfreq, fmin = fmin,fmax=fmax,
                                                                                   indices=indices, tmin=0, 
                                                                                   mt_adaptive=True,faverage=True, 
                                                                                   block_size=1000,verbose='ERROR')

    return con

In [None]:
eeg_chan = GA_epoches[0].ch_names[0:59]
sfreq = GA_epoches[0].info['sfreq']

delay = np.arange(-5,6) / 10
delay = [0.2]

condition = ['All']

features = ['envelop','lipaparature']

#############################
iter_freqs = [
    ('fr', 0.25, 1),
    ('fr', 0.5, 2),
    ('fr', 1, 3),
    ('fr', 1, 4),
    ('fr', 2, 6),
    ('fr', 4, 8),
    ('fr', 8, 12),
    ('fr', 12, 18),
    ('fr', 18, 24),
    ('fr', 24, 40)
]

#######################################    
    
    
for s in range(0,len(subject_name)):
    frame = [] 
    for d in range(0,len(delay)):        
        for con in condition:
            surrogate_coh,no_surrogates = Coherence_preprocess_delay_surrogate(GA_epoches[s],remove_first,
                                                                        delay[d] + 0.5,trial_len,eeg_chan,features,
                                                                        con,iter_freqs)

            # mean or median of the surrogate distribution
            coh=surrogate_coh

            for fr in range(0,len(iter_freqs)):
                a = str(iter_freqs[fr][0])+ ' '+str(iter_freqs[fr][1])+' - '+str(iter_freqs[fr][2])+'Hz'
                #aa = iter_freqs[fr][1]
                cc = np.split(coh[:,fr,:], len(features))
                for f in range(0,len(features)):
                    feature = features[f]
                    df = pd.DataFrame({'Condition':con,'Freq':a,'Delay':delay[d],'Subject':subject_name[s],'Feature':feature,
                                   'Data':[cc[f]],'no_surrogates':no_surrogates})
                    frame.append(df) 
                
            print(str(delay[d])+'-'+subject_name[s])
            
    data=pd.concat((frame),axis=0)
    a = ('-').join(features)
    save_path = data_path + '/python/data/SurrogateCoherence/SurrogateCoherence-removedFirst-' \
    +str(remove_first)+'-'+a+'-'+subject_name[s]+'.pkl'
    data.to_pickle(save_path)  

# putit into one file    
A=[]
a = ('-').join(features)
for s in subject_name:
    save_path = data_path + '/python/data/SurrogateCoherence/SurrogateCoherence-removedFirst-' \
    +str(remove_first)+'-'+a+'-'+s+'.pkl'
    A.append(pd.read_pickle(save_path))

data = pd.concat((A),axis=0)
save_path = data_path + '/python/data/SurrogateCoherence/SurrogateCoherence-removedFirst-' \
    +str(remove_first)+'-'+a+'.pkl'  
data.to_pickle(save_path)  


# Measure the coherence in specific delay and shifting the eeg and speech from speech onset

In [None]:
remove_first = [0,0.1,0.2,0.3,0.4,0.5] #seconds

for rf in remove_first:
        # let's explore some frequency bands
    iter_freqs = [
        ('Delta', 1, 3),
        ('Theta', 4, 7),
        ('Alpha', 8, 12),
        ('Beta', 13, 25),
        ('Gamma', 30, 40)
    ]
    features = ['envelop','jawaopening','lipaparature','lipProtrusion','TTCD','TMCD','TBCD']
    condition = ['Hyper','Normal','Hypo']
    delay = np.arange(0,1.1,0.1)
    delay = [0,0.1,0.2]
    delay = np.add(delay,0.5) #shift to below algorithm format
    indices = (np.repeat([np.arange(59,len(features)+59)],59),np.tile(np.arange(0,59),len(features)))  
    extra_channels = ['envelop','jawaopening','lipaparature','lipProtrusion','TTCD','TMCD','TBCD']
    eeg_channles = np.setdiff1d(GA_epoches[0].ch_names, extra_channels)
    event_id = {'hyper': 1,'normal': 2,'hypo': 3}
    ch_types = np.repeat('eeg', len(features)+59)
    ch_names = np.hstack((eeg_channles,features))        
    info = mne.create_info(ch_names = ch_names.tolist(),ch_types = ch_types,sfreq = GA_epoches[0].info['sfreq'])
    ch_names = np.setdiff1d(extra_channels,features)

    for s in tqdm_notebook(range(0,len(subject_name)), desc='Subjects'):
        frames = []
        for d in tqdm_notebook(delay, desc='Delay'):        

            epoch = listen_italian_functions.coherence_preprocess_delay(GA_epoches[s],rf,d,trial_len,
                                                                        extra_channels,eeg_channles,info,ch_names,event_id)


            for band, fmin, fmax in iter_freqs:
                a = band+ ' '+str(fmin)+' - '+str(fmax)+'Hz'                
                hyper, freqs, times, n_epochs, n_tapers = listen_italian_functions.coherence_measure(epoch['hyper'],fmin, fmax,indices)
                normal, freqs, times, n_epochs, n_tapers = listen_italian_functions.coherence_measure(epoch['normal'],fmin, fmax,indices)
                hypo, freqs, times, n_epochs, n_tapers = listen_italian_functions.coherence_measure(epoch['hypo'],fmin, fmax,indices)
                allC, freqs, times, n_epochs, n_tapers = listen_italian_functions.coherence_measure(epoch,fmin, fmax,indices)

                hyper = np.split(hyper, len(features))
                normal = np.split(normal, len(features))
                hypo = np.split(hypo, len(features))
                allC = np.split(allC, len(features))

                for f in range(0,len(features)):
                    feature = features[f]
                    df = pd.DataFrame({'Condition':'Hyper','Freq':a,'Delay':d,'Feature':feature,
                                               'noTrials':GA_epoches[s]['hyper'].get_data().shape[0],
                                               'Subject':subject_name[s],'Data':[hyper[f].flatten()]})
                    df = df.append(pd.DataFrame({'Condition':'Normal','Freq':a,'Delay':d,'Feature':feature,
                                                 'noTrials':GA_epoches[s]['normal'].get_data().shape[0],
                                                 'Subject':subject_name[s],'Data':[normal[f].flatten()]}))
                    df = df.append(pd.DataFrame({'Condition':'Hypo','Freq':a,'Delay':d,'Feature':feature,
                                                 'noTrials':GA_epoches[s]['hypo'].get_data().shape[0],
                                                 'Subject':subject_name[s],'Data':[hypo[f].flatten()]}))
                    df = df.append(pd.DataFrame({'Condition':'All','Freq':a,'Delay':d,'Feature':feature,
                                                 'noTrials':GA_epoches[s].get_data().shape[0],
                                                 'Subject':subject_name[s],'Data':[allC[f].flatten()]}))
                    frames.append(df)
            print(d)

        data=pd.concat((frames),axis=0)
        save_path = data_path + '/analysis/python/data/shift/coherence-trialLen-'+str(trial_len)+'-removedFirst-'+str(rf)+'s-'+subject_name[s]
        data.to_pickle(save_path)