In [39]:
#
from scipy.signal import hilbert
from scipy.signal import butter, lfilter
from scipy import signal
from scipy import spatial
import os
import warnings
from matplotlib import pyplot as plt

import pandas as pd
import pickle
import scipy.io
import numpy as np
from IPython.display import clear_output

data_path = os.path.dirname(os.path.dirname(os.getcwd()))


In [40]:
# function 

def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

# ChimeraSoftware
def equal_xbm_bands(fmin, fmax, N):
# EQUAL_XBM_BANDS - Divide frequency interval into N bands of equal width
# along the human basilar membrane.
# Based on M.C. Liberman's cochlear frequency map for the cat 
# scaled to match human frequency range of hearing.
#
# fmin minimum frequency in Hz
# fmax maximum frequency in Hz
# N number of frequency bands
# fco Vector of band cutoff frequencies in Hz (size [1 X (N+1)])
    xmin = cochlear_map(fmin, 20000)
    xmax = cochlear_map(fmax, 20000)
    dx = (xmax-xmin)/N
    x = np.arange(xmin,xmax+dx,dx)
    fco = inv_cochlear_map(x, 20000)    
    return fco
    
def cochlear_map(f, Fmax):
# INV_COCHLEAR_MAP - Convert frequency to distance along the basilar membrane
# 		     using M.C. Liberman's cochlear frequency map for the cat.
#
#	f		Frequency in Hz
#	Fmax	Maximum frequency represented on the basilar membrane in Hz.
#			By default, this is 57 kHz, the value for the cat.
#			Setting Fmax to 20,000 Hz gives a map appropriate for the human cochlea.
#	x		Percent distance from apex of basilar membrane

    f = f * inv_cochlear_map(100,0)/Fmax
    x = np.log10(f/456 + .8)/.021

    return x
    
def inv_cochlear_map(x, Fmax):
# INV_COCHLEAR_MAP - Convert distance along the basilar membrane to frequency
# 		     using M.C. Liberman's cochlear frequency map for the cat.
#
# Usage: f = inv_cochlear_map(x, Fmax)
#	x		Percent distance from apex of basilar membrane
#	Fmax	Maximum frequency represented on the basilar membrane in Hz.
#			By default, this is 57 kHz, the value for the cat.
#			Setting Fmax to 20 kHz gives a map appropriate for the human cochlea.
#	f		Frequency in Hz

    f = 456 * 10.**(.021 *x) - 364.8
    if not(Fmax==0):
        f = f * Fmax/inv_cochlear_map(100,0)
    return f

# ema features

def get_ema_feaures(ema,palate_trace):
    jawaopening = []
    lipaparature = []
    lipProtrusion = []
    TBCD = []
    TMCD = []
    TTCD = []
    for i in range(0,ema.shape[1]):
        xui = ema[6,i]
        zui = ema[8,i]
        xli = ema[9,i]
        zli = ema[11,i]
        jawaopening.append(abs(zui - zli))
        
        xul = ema[0,i]
        zul = ema[2,i]
        xll = ema[3,i]
        zll = ema[5,i]
        lipaparature.append(abs(zul - zll))
        
        lipProtrusion.append(abs(xul - xll)/2)
        
        a = np.zeros((2,1))
        
        a[0],a[1] = A[12,i],A[14,i]        
        x = spatial.distance.cdist(palate_trace, a.T, 'euclidean')
        TBCD.append(min(x))

        a[0],a[1] = A[15,i],A[17,i]        
        x = spatial.distance.cdist(palate_trace, a.T, 'euclidean')
        TMCD.append(min(x))
        
        a[0],a[1] = A[18,i],A[20,i]        
        x = spatial.distance.cdist(palate_trace, a.T, 'euclidean')
        TTCD.append(min(x))

    return np.stack(jawaopening).flatten(), np.stack(lipaparature).flatten(), \
            np.stack(lipProtrusion).flatten(), np.stack(TBCD).flatten(), \
            np.stack(TMCD).flatten(), np.stack(TTCD).flatten()



In [41]:
# load data

a = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
raw_fname = a+ '\exp\exp_running_scirpt\olm_stimuli_normalRepeatTwice.mat'
data = scipy.io.loadmat(raw_fname)
raw_fname = a+ '/analysis/behaviour/data/palate_trace_only_stimuli_new.mat'
palate_trace = scipy.io.loadmat(raw_fname)
palate_trace = palate_trace['palate_trace']

In [42]:
# data parameters
bands = equal_xbm_bands(100, 10000, 6)
resample_freq = 100


# ema features 

jawaopening, lipaparature, lipProtrusion, TBCD, TMCD, TTCD

In [44]:
# extract features

filenames = data['data']['filename'][0][0][0]
speech = data['data']['speech'][0][0][0]
fs = data['data']['fs'][0][0][0][0]
lab = data['data']['lab'][0][0][0]
ema = data['data']['ema'][0][0][0]



frame=[]
for i in range(0,len(filenames)):
    # meat data
    filename = filenames[i][0].replace('.wav','')
    
    a = filename.split('_')
    if(a[0]=='n'):
        condition = 2
    elif(int(a[2])>5):
        condition = 3
    else:
        condition = 1
    
    # speech envelop
    envelop = []
    for j in range(0,len(bands)-1):
        x = butter_bandpass_filter(speech[i],bands[j], bands[j+1], fs, order=4)
        envelop.append(abs(hilbert(x)))
    envelop = np.hstack(envelop)
    envelop = np.sum(envelop,axis=1)
    
    # downsample
    a = len(envelop)/fs # Number of seconds in signal X
    a = a*resample_freq     # Number of samples to downsample
    envelop = scipy.signal.resample(envelop, int(np.ceil(a)))
    
    a = len(ema[i][0,:])/400 # Number of seconds in signal X
    a = a*resample_freq     # Number of samples to downsample
    
    A = []
    for j in range(0,ema[i].shape[0]):
        A.append(signal.resample(ema[i][j,:], int(np.ceil(a))))    
    A = np.stack(A)
    
    #time
    time = np.arange(0,A.shape[1])/resample_freq
    
    # ema features    
    jawaopening, lipaparature, lipProtrusion, TBCD, TMCD, TTCD = get_ema_feaures(A,palate_trace)
    x = np.stack((jawaopening, lipaparature, lipProtrusion, TBCD, TMCD, TTCD))

    if(len(envelop)>x.shape[1]):
        envelop = envelop[:x.shape[1]]            
    elif(x.shape[1]>len(envelop)):
        x = x[:,:len(envelop)]
    
    jawaopening = x[0,:]
    lipaparature = x[1,:]
    lipProtrusion = x[2,:]
    TBCD = x[3,:]
    TMCD = x[4,:]
    TTCD = x[5,:]
    
    # linguistic    
    phonemeRate = np.stack(lab[i]['phoneme'][0][0].flatten()).flatten()
    a = np.stack(np.where(phonemeRate=='sil')).flatten()
    phonemeRate = np.delete(phonemeRate,a)
    phonemeRate = len(phonemeRate)/time[-1]
    
    phonemeDuration = lab[i]['endTime'][0][0] - lab[i]['startTime'][0][0]
    phonemeDuration = np.delete(phonemeDuration,a)
    
    #
    df = pd.DataFrame({'filename':[filename],'condition':condition,'envelop':[envelop],
                      'phonemeRate':phonemeRate,'phonemeDuration':np.mean(phonemeDuration),
                      'time':[time],'trialno':i+1,'Trial_len':time[-1],
                      'jawaopening':[jawaopening],'lipaparature':[lipaparature],
                       'lipProtrusion':[lipProtrusion],'TBCD':[TBCD],'TMCD':[TMCD],'TTCD':[TTCD]})
    frame.append(df)
    print(i,end=' ')
    
df = pd.concat((frame),axis=0)
save_path = data_path + '/python/data/extracted_features/features.pkl'
df.to_pickle(save_path)

clear_output()


In [45]:
df.head()

Unnamed: 0,filename,condition,envelop,phonemeRate,phonemeDuration,time,trialno,Trial_len,jawaopening,lipaparature,lipProtrusion,TBCD,TMCD,TTCD
0,n_olm_10,2,"[0.001692200109040596, -0.0008084144425224105,...",10.847458,0.092265,"[0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07...",1,2.95,"[30.103634735905516, 30.032582742654824, 29.82...","[11.096096305275196, 13.238662502364491, 12.63...","[0.41183852230622175, 0.318784244534835, 0.358...","[5.461230003834577, 2.257390198437379, 3.25784...","[9.393921487414033, 3.697028843662893, 5.17781...","[10.713717561099667, 6.635367713264946, 7.6422..."
0,n_olm_11,2,"[9.174823478928865e-05, 6.050216653975809e-05,...",8.96861,0.111694,"[0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07...",2,2.23,"[30.929478292945234, 33.417389740513045, 32.51...","[10.91328544218321, 16.038433701970916, 14.219...","[1.2428282033558373, 1.738066840798263, 1.5389...","[4.962855048954533, 5.677779956563543, 4.82134...","[7.5644993810014345, 7.886240173562104, 6.8102...","[6.633780239443172, 8.089739127250022, 6.55762..."
0,n_olm_12,2,"[5.7238373804781084e-05, 0.0001869709821375507...",11.073826,0.090303,"[0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07...",3,5.96,"[31.857629008943988, 34.60441027486472, 33.904...","[10.760038424455047, 15.103861536541416, 13.91...","[0.6929427877854719, 1.1262522537662503, 1.044...","[7.628769282705471, 10.881697038963885, 10.211...","[8.146133907855065, 10.13189982874743, 9.59227...","[4.075663657871258, 3.9686479085518194, 3.7503..."
0,n_olm_136,2,"[7.923059199337057e-05, 4.1701120906607743e-05...",9.240246,0.108303,"[0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07...",4,4.87,"[31.08553903540566, 32.32608360208931, 31.4713...","[10.893062393109219, 15.149001997386549, 13.43...","[0.9272713365514891, 1.4825528881568508, 1.173...","[3.6501514281538006, 2.5273135070412205, 2.311...","[5.781948073258219, 4.623154231062672, 4.18928...","[6.291747230303505, 7.949778822541005, 6.64488..."
0,n_olm_151,2,"[2.5305367177598265e-05, 3.3395058576589286e-0...",11.157025,0.089712,"[0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07...",5,4.84,"[30.472208819268417, 29.63853505481626, 29.715...","[11.524835305530374, 11.837957971312036, 11.27...","[0.4811630301865657, 0.4792285395709919, 0.461...","[2.080607791403024, 1.0266867179550803, 1.1095...","[4.781305631490753, 2.8354140424079977, 3.1321...","[6.31820765924557, 4.138574892038586, 4.323645..."


In [5]:
# assign stimuli to the subjects
subject_name = ['Alice','Andrea','Daniel','Elena','Elenora','Elisa','Federica','Francesca'
                ,'Gianluca1','Giada','Giorgia','Jonluca','Laura','Leonardo','Linda','Lucrezia',
                'Manu','Marco','Martina','Pagani','Pasquale','Sara','Silvia','Silvia2','Tommaso']

for s in subject_name:
    a = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
    raw_fname = a +'/exp/data/matlab_exp_data/' + s + '.mat'
    mat = scipy.io.loadmat(raw_fname)
    trialno = mat['experiment']['media'][0,0]['permute'][0][0][0]
    
    response = np.stack(mat['experiment']['media'][0,0]['Cresponse'][0][0].flatten()) - \
                np.stack(mat['experiment']['media'][0,0]['Sresponse'][0][0].flatten())
    
    RT = np.stack(mat['experiment']['media'][0,0]['responseT'][0][0].flatten())
    
    df1 = pd.DataFrame({'trialno': range(200)})
    df1['trialno'] = trialno
    df1['response'] = response
    df1['RT'] = RT
    a = df.merge(df1,on='trialno')


Unnamed: 0,trialno,response,RT
0,119,0,1660.0
1,182,0,929.0
2,80,0,1487.0
3,30,0,1224.0
4,67,0,1054.0


In [47]:
df.iloc[181]

TBCD               [[4.282308388817891], [4.282308388817891], [4....
TMCD               [[5.643807511839657], [5.643807511839657], [5....
TTCD               [[6.490858629357428], [6.490858629357428], [6....
condition                                                          3
envelop            [-3.2506151529461245e-06, 2.2438887994116076e-...
filename                                                    olm_55_7
jawaopening        [26.863450732820823, 28.12489543631078, 27.357...
lipProtrusion      [2.640908861307007, 2.8851867402242624, 2.6805...
lipaparature       [9.629222866649116, 11.171958557784848, 10.162...
phonemeDuration                                            0.0714158
phonemeRate                                                  14.0845
time               [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07...
trialno                                                          181
Name: 0, dtype: object