In [196]:
import os
import numpy as np
import math

We have 3 sets of same data.
1. the original trahectory coordinates sequences
2. velocity, angles, angular velocity sequences
3. event labels sequence for pause and reversal

#### features for each event(pause, reversal, nonevent):
1. Speed Mean &rarr; SMPause, SMNonpause
2. Speed Variance &rarr; SVPasue, SVNonpause
3. Angular Speed Mean &rarr; ASMReversal, ASMNonreversal
4. Angular Speed Variance &rarr; ASVReversal, ASVNonreversal
5. Event Duration Mean &rarr; DMPause, DMNonpause, DMReversal, DMNonreversal
6. Event Duration Variance &rarr; DVPause, DVNonpause, DVReversal, DVNonreversal
7. Correlation between Speed and Angular Speed

#### features across events:
8. Pause Frequency
9. Reversal Frequency

#### other features:
10. Confinement Ratio &rarr; Sensitive to time lenght, not suitable for events since events are short

Let's start from these 10 features, we will end up with 3x8+2 features, they will be flatten into 1d vector then feed into MLP.

`Sep`: Seperate the velocity or angular velocity sequences into event(`Label=1`) or nonevent(`Label=0`) list(`PureSeq`). Each list will have different lenght of sequence. E.g. 60 steps of velocity(`DataSeq`) can be separated into `PureSeq` when `Label=1`: [[2, 3, 1], [2, 1], [5, 6]] for pause event, or `PureSeq` when `Label=0`:[[4, 5], [1, 2]] for non pause event. The two `PureSeq` merge together and reshape into 1d will be the original `DataSeq`.

In [238]:
def Sep(DataSeq, LabelSeq, Label):
    labelseq = (LabelSeq == Label).astype(int)
    PureSeq = []
    tmp = []
    for x in DataSeq*labelseq:
        if x!=0: 
            tmp.append(x)
        else:
            PureSeq.append(tmp)
            tmp = []
    if tmp!=[]:
        PureSeq.append(tmp)
    PureSeq = [x for x in PureSeq if x != []]
    return PureSeq

def SpeedMeanANDVariance(PureSeq): 
    flat=[]
    for sec in PureSeq:
        flat = flat + sec
    SM = np.mean(np.array(flat))
    SV = np.var(np.array(flat))
    return SM, SV

def DurMeanANDVariance(PureSeq): 
    dur=[]
    for sec in PureSeq:
        dur.append(len(sec))
    DM = sum(dur)/len(dur)
    DV = np.var(np.array(dur))
    return DM, DV

def ConfinementRatio(CoorSeq):
    d_total = 0
    for i in range(1, len(CoorSeq)):
        d_total += math.sqrt(math.pow((CoorSeq[i][0]-CoorSeq[i-1][0]), 2)+math.pow((CoorSeq[i][1]-CoorSeq[i-1][1]), 2))
    d_net = math.sqrt(math.pow((CoorSeq[-1][0]-CoorSeq[0][0]), 2)+math.pow((CoorSeq[-1][1]-CoorSeq[0][1]), 2))
    return d_net/d_total

In [247]:
coor_list = os.listdir("coor")
vaa_list = os.listdir("3seq")
label_list = os.listdir("2EventSeq")
for i in range(5):
    coor_class = np.load(os.path.join("coor", coor_list[i]))
    vaa_class = np.load(os.path.join("3seq", vaa_list[i]))
    label_class = np.load(os.path.join("2EventSeq", label_list[i]))
    
    ff_class = np.reshape(np.zeros(26), (1,26)) ##ff is for final features
    for n in range(coor_class.shape[0]):
        CoorSeq = coor_class[n]
        DataSeq_V = vaa_class[n,:,0]
        DataSeq_Av = vaa_class[n,:,1]
        LabelSeq_Pause = label_class[n,:,0]
        LabelSeq_Reversal = label_class[n,:,1]
        
        if not LabelSeq_Pause.any() or not LabelSeq_Reversal.any():
            ff_sample = np.reshape(np.zeros(26), (1,26))
            ff_class = np.append(ff_class,ff_sample,axis=0)
            #print("No event")
            continue
            
        PureSeq_Pause = Sep(DataSeq_V, LabelSeq_Pause, 1)
        PureSeq_Npause = Sep(DataSeq_V, LabelSeq_Pause, 0)
        PureSeq_Reversal = Sep(DataSeq_Av, LabelSeq_Reversal, 1)
        PureSeq_Nreversal = Sep(DataSeq_Av, LabelSeq_Reversal, 0)
        
        #6
        speed_mean_pause,speed_var_pause = SpeedMeanANDVariance(PureSeq_Pause)
        speed_mean_npause,speed_var_npause = SpeedMeanANDVariance(PureSeq_Npause)
        speed_mean_all = np.mean(DataSeq_V)
        speed_var_all = np.var(DataSeq_V)
        
        #6
        aspeed_mean_reversal,aspeed_var_reversal = SpeedMeanANDVariance(PureSeq_Reversal)
        aspeed_mean_nreversal,aspeed_var_nreversal = SpeedMeanANDVariance(PureSeq_Nreversal)
        aspeed_mean_all = np.mean(DataSeq_Av)
        aspeed_var_all = np.var(DataSeq_Av)
        
        #8
        dur_mean_pause, dur_var_pause = DurMeanANDVariance(PureSeq_Pause)
        dur_mean_npause, dur_var_npause = DurMeanANDVariance(PureSeq_Npause)
        dur_mean_reversal, dur_var_reversal = DurMeanANDVariance(PureSeq_Reversal)
        dur_mean_nreversal, dur_var_nreversal = DurMeanANDVariance(PureSeq_Nreversal)
        
        #6
        vaa_corr = np.correlate(DataSeq_V, DataSeq_Av)
        freq_pause = len(PureSeq_Pause)
        freq_reversal = len(PureSeq_Reversal)
        ratio_pause = sum(LabelSeq_Pause)/len(LabelSeq_Pause)
        ratio_reversal = sum(LabelSeq_Reversal)/len(LabelSeq_Reversal)
        ConfineRatio = ConfinementRatio(CoorSeq)   
    
        ff_sample = np.array([speed_mean_pause,speed_var_pause,speed_mean_npause,speed_var_npause,
                             speed_mean_all,speed_var_all,aspeed_mean_reversal,aspeed_var_reversal,
                             aspeed_mean_nreversal,aspeed_var_nreversal,aspeed_mean_all, aspeed_var_all,
                             dur_mean_pause, dur_var_pause, dur_mean_npause, dur_var_npause,
                              dur_mean_reversal, dur_var_reversal,dur_mean_nreversal, dur_var_nreversal,
                             vaa_corr, freq_pause, freq_reversal, ratio_pause, ratio_reversal, ConfineRatio])
        ff_sample = np.reshape(ff_sample, (1,26))
        ff_class = np.append(ff_class,ff_sample,axis=0)
    ff_class = np.delete(ff_class, 0, axis=0)
    class2EventSeq_path = os.path.join("26features",vaa_list[i][:-7]+"26features.npy")
    print(class2EventSeq_path, "is saved")
    np.save(class2EventSeq_path, ff_class)

26features\Class1_S7_1263_326features.npy is saved


FileNotFoundError: [Errno 2] No such file or directory: '26features\\Class1_S7_1263_326features.npy'

In [246]:
ff_class.shape

(981, 26)