In [1]:
import glob
import numpy as np
import pandas as pd

In [34]:
files = map(np.load, glob.glob("../export/data/feature_data_*_with_pupil.npz"))

In [35]:
selected_signals = ['ECG', 'EDA',  'Resp', 'SKT']
selected_attribute = ['mean', 'std', 'min', 'max', 'mean_diff', 'mean_abs_diff']

n_selected_signals = len(selected_signals)
n_selected_attribute = len(selected_attribute)

n_subject = len(files)

skip_sample = 100
number_of_sample = 800

In [57]:
x_valence = pd.DataFrame()
x_arousal = pd.DataFrame()
y_valence = []
y_arousal = []

sessions_to_skip = {
    "30"   : [800, 1178, 1712, 2628, 3646],
    "53"   : [426, 814, 3028, 3538],
    "69"   : [1726, 2866, 3134, 3670],
    "90"   : [664, 932, 1714, 3012],
    "111"  : [32, 1210, 2354, 3158]
}


valid_files = [ 
    f
    for f in files 
    if f["session_info"].tolist().get("mediaFile").split(".")[0] in sessions_to_skip.keys()
]

for f in valid_files:

    session = f["session_info"].tolist()
    sid = int(session["sessionId"])
    mediafile = session["mediaFile"]
    if sid not in sessions_to_skip[mediafile.split(".avi")[0]]:
        
        valence = int(session["feltVlnc"])
        arousal = int(session["feltArsl"])

        data_valence = pd.DataFrame()
        data_arousal = pd.DataFrame()

        for signal in selected_signals:
            for attribute in selected_attribute:

                # add a column for each feature
                column = signal + "_" + attribute
                data_valence[column] = f["valence"].tolist()[column][skip_sample:skip_sample + number_of_sample]            
                data_arousal[column] = f["arousal"].tolist()[column][skip_sample:skip_sample + number_of_sample]
                
            # add a label for each timestep
            data_valence["label"] = valence
            data_arousal["label"] = arousal

        # ADD pupil feature
        PL = f['pupil'].tolist()[0][skip_sample:skip_sample + number_of_sample]
        PR = f['pupil'].tolist()[1][skip_sample:skip_sample + number_of_sample]
        mean_pupil = np.array([PL, PR]).mean(axis=0)
        data_pupil = pd.DataFrame(mean_pupil, columns=['Pupil'])
        
        # concatenate pupil data
        data_valence = pd.concat([data_valence, data_pupil], axis=1, sort=False)
        data_arousal = pd.concat([data_arousal, data_pupil], axis=1, sort=False)
        
        # concatenate all subjects' data
        x_valence = pd.concat([x_valence, data_valence], ignore_index=True)
        x_arousal = pd.concat([x_arousal, data_arousal], ignore_index=True)
    
        
# split labels from data
y_valence = x_valence["label"]
y_arousal = x_arousal["label"]

x_valence = x_valence.drop(["label"], axis=1)
x_arousal = x_arousal.drop(["label"], axis=1)

# Remove nan values
x_valence = x_valence.fillna(x_valence.median())
x_arousal = x_arousal.fillna(x_arousal.median())

In [58]:
x_valence

Unnamed: 0,ECG_mean,ECG_std,ECG_min,ECG_max,ECG_mean_diff,ECG_mean_abs_diff,EDA_mean,EDA_std,EDA_min,EDA_max,...,Resp_max,Resp_mean_diff,Resp_mean_abs_diff,SKT_mean,SKT_std,SKT_min,SKT_max,SKT_mean_diff,SKT_mean_abs_diff,Pupil
0,0.591976,0.346332,0.659456,0.455666,0.518376,0.354627,0.186122,0.269702,0.156360,0.221663,...,0.431373,0.405084,0.197809,0.091188,0.089824,0.149954,0.088744,0.850650,0.029230,0.542305
1,0.585329,0.352403,0.650345,0.462035,0.514925,0.363294,0.183203,0.262954,0.156360,0.218291,...,0.431373,0.420240,0.197191,0.092286,0.091275,0.149954,0.091082,0.857464,0.031062,0.521163
2,0.578658,0.356050,0.641987,0.468832,0.512161,0.369152,0.180437,0.254290,0.156360,0.214920,...,0.431373,0.435383,0.197275,0.093404,0.089934,0.151981,0.091082,0.851115,0.028568,0.504552
3,0.571970,0.357705,0.632870,0.475203,0.508722,0.378052,0.177827,0.243965,0.156360,0.211553,...,0.431373,0.451082,0.197844,0.094512,0.091444,0.151981,0.093486,0.857820,0.030469,0.508509
4,0.565260,0.356569,0.624521,0.482036,0.505943,0.382399,0.175379,0.232172,0.156360,0.208194,...,0.431373,0.466658,0.199057,0.095640,0.090234,0.154055,0.093486,0.851596,0.027926,0.536886
5,0.558532,0.353371,0.615388,0.489122,0.502524,0.380812,0.173096,0.219109,0.156360,0.204846,...,0.431373,0.482670,0.200694,0.096759,0.091883,0.154055,0.095967,0.858221,0.029899,0.514061
6,0.551783,0.349340,0.607065,0.496854,0.499719,0.375670,0.170979,0.204987,0.156360,0.201512,...,0.431373,0.498436,0.202898,0.097897,0.090889,0.156158,0.095967,0.852159,0.027308,0.484947
7,0.545014,0.345282,0.597890,0.504116,0.496337,0.373173,0.169029,0.190011,0.156360,0.198196,...,0.431373,0.514506,0.205448,0.099028,0.092744,0.156158,0.098531,0.858726,0.029343,0.497665
8,0.538228,0.340703,0.589647,0.511824,0.493468,0.368166,0.167248,0.174384,0.156360,0.194899,...,0.431373,0.530193,0.208484,0.100179,0.092020,0.158265,0.098531,0.852827,0.026721,0.490926
9,0.531415,0.336193,0.580282,0.519096,0.490226,0.365596,0.165636,0.158314,0.156360,0.191624,...,0.431373,0.546037,0.211804,0.101324,0.094104,0.158265,0.101160,0.859349,0.028791,0.452867


In [59]:
print x_valence.shape
print y_valence.shape

print x_arousal.shape
print y_arousal.shape

print x_valence.columns

(82400, 25)
(82400,)
(82400, 25)
(82400,)
Index([u'ECG_mean', u'ECG_std', u'ECG_min', u'ECG_max', u'ECG_mean_diff',
       u'ECG_mean_abs_diff', u'EDA_mean', u'EDA_std', u'EDA_min', u'EDA_max',
       u'EDA_mean_diff', u'EDA_mean_abs_diff', u'Resp_mean', u'Resp_std',
       u'Resp_min', u'Resp_max', u'Resp_mean_diff', u'Resp_mean_abs_diff',
       u'SKT_mean', u'SKT_std', u'SKT_min', u'SKT_max', u'SKT_mean_diff',
       u'SKT_mean_abs_diff', u'Pupil'],
      dtype='object')


In [60]:
np.save("data_valence_physio_with_pupil_for_svm_only_valid_session.npy", x_valence)
np.save("data_valence_label_with_pupil_for_svm_only_valid_session.npy", y_valence)

np.save("data_arousal_physio_with_pupil_for_svm_only_valid_session.npy", x_arousal)
np.save("data_arousal_label_with_pupil_for_svm_only_valid_session.npy", y_arousal)

np.save("data_columns.npy", x_valence.columns)