In [1]:
import glob
import numpy as np
import pandas as pd

In [2]:
files = map(np.load, glob.glob("../export/data/feature_data_*.npz"))

In [3]:
selected_signals = ['ECG', 'EDA',  'Resp', 'SKT']
selected_attribute = ['mean', 'std', 'min', 'max', 'mean_diff', 'mean_abs_diff']

n_selected_signals = len(selected_signals)
n_selected_attribute = len(selected_attribute)

n_subject = len(files)

skip_sample = 100
number_of_sample = 800

In [4]:
x_valence = pd.DataFrame()
x_arousal = pd.DataFrame()
y_valence = []
y_arousal = []

for f in files[:n_subject]:

    session = f["session_info"].tolist()
    sid = int(session["sessionId"])

    mediafile = session["mediaFile"]
    
    valence = int(session["feltVlnc"])
    arousal = int(session["feltArsl"])

    data_valence = pd.DataFrame()
    data_arousal = pd.DataFrame()

    for signal in selected_signals:
        for attribute in selected_attribute:

            # add a column for each feature
            column = signal + "_" + attribute
            data_valence[column] = f["valence"].tolist()[column][skip_sample:skip_sample + number_of_sample]            
            data_arousal[column] = f["arousal"].tolist()[column][skip_sample:skip_sample + number_of_sample]

        # add a label for each timestep
        data_valence["label"] = valence
        data_arousal["label"] = arousal
        
    # concatenate all subjects' data
    x_valence = pd.concat([x_valence, data_valence], ignore_index=True)
    x_arousal = pd.concat([x_arousal, data_arousal], ignore_index=True)

# split labels from data
y_valence = x_valence["label"]
y_arousal = x_arousal["label"]

x_valence = x_valence.drop(["label"], axis=1)
x_arousal = x_arousal.drop(["label"], axis=1)

# Remove nan values
x_valence = x_valence.fillna(x_valence.median())
x_arousal = x_arousal.fillna(x_arousal.median())

In [7]:
print x_valence.shape
print y_valence.shape

print x_arousal.shape
print y_arousal.shape

print x_valence.columns

(321600, 24)
(321600,)
(321600, 24)
(321600,)
Index([u'ECG_mean', u'ECG_std', u'ECG_min', u'ECG_max', u'ECG_mean_diff',
       u'ECG_mean_abs_diff', u'EDA_mean', u'EDA_std', u'EDA_min', u'EDA_max',
       u'EDA_mean_diff', u'EDA_mean_abs_diff', u'Resp_mean', u'Resp_std',
       u'Resp_min', u'Resp_max', u'Resp_mean_diff', u'Resp_mean_abs_diff',
       u'SKT_mean', u'SKT_std', u'SKT_min', u'SKT_max', u'SKT_mean_diff',
       u'SKT_mean_abs_diff'],
      dtype='object')


In [8]:
np.save("data_valence_physio_for_svm.npy", x_valence)
np.save("data_valence_label_for_svm.npy", y_valence)

np.save("data_arousal_physio_for_svm.npy", x_arousal)
np.save("data_arousal_label_for_svm.npy", y_arousal)

np.save("data_columns.npy", x_valence.columns)