In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Conv1D, MaxPooling1D
from keras.layers import GlobalAveragePooling1D, LSTM, Flatten, Conv2D, Reshape
from sklearn.model_selection import train_test_split
# fix random seed for reproducibility
np.random.seed(7)

Using TensorFlow backend.


In [2]:
import glob
files = map(np.load, glob.glob("../export/data/feature_data_*.npz"))

In [3]:
signals = files[0]["signals"]
attributes = files[0]["attributes"]
print signals
print attributes

['ECG' 'EDA' 'Resp' 'SKT']
['mean' 'std' 'min' 'max' 'mean_diff' 'mean_abs_diff']


In [4]:
selected_signals = ['ECG', 'EDA',  'Resp', 'SKT']
selected_attribute = ['mean', 'std', 'min', 'max', 'mean_diff', 'mean_abs_diff']
n_subject = len(files)
n_selected_signals = len(selected_signals)
n_selected_attribute = len(selected_attribute)

In [5]:
number_of_sample = 1500
inputs_raw = pd.DataFrame()
labels = []

for f in files:
    
    session = f["session_info"].tolist()
    mediafile = session["mediaFile"]
    sid = int(session["sessionId"])
    label = int(session["feltVlnc"])
    
    data = pd.DataFrame()
    
    for signal in selected_signals:
        for attribute in selected_attribute:
            
            # add a column for each feature
            column = signal + "_" + attribute
            data[column] = f["valence"].tolist()[column][:number_of_sample]
    
    # remove NaN values with median for each feature 
    data = data.fillna(data.median())
    
    # add a sid for each timestep
    data["sid"] = sid
    
    # add a sid for each timestep
    data["media"] = mediafile
    
    # add a label for each timestep
    labels.append(label)
    
    # concatenate all subjects' data
    inputs_raw = pd.concat([inputs_raw, data], ignore_index=True)

labels = pd.DataFrame(labels, columns=['label'])

In [6]:
# Convert labels to categorical one-hot encoding
X = inputs_raw.drop(['sid', 'media'], axis=1)
Y = pd.get_dummies(labels.copy(), columns=['label'])

In [7]:
# reshape to CNN input shape
input_X = X.copy().values.reshape(n_subject, number_of_sample, X.shape[1])

In [65]:
# split training and validation data
#x_train, x_valid, y_train, y_valid = train_test_split(input_X, Y, test_size=0.2, shuffle=False)
x_train, x_valid, y_train, y_valid = train_test_split(np.expand_dims(input_X,axis=3), Y, test_size=0.2, shuffle=False)
print x_train.shape
print y_train.shape
print x_valid.shape
print y_valid.shape

(89, 1500, 24, 1)
(89, 9)
(23, 1500, 24, 1)
(23, 9)


In [67]:
batch_size = x_train.shape[0]
data_dim = x_train.shape[2]
timesteps = x_train.shape[1]

num_classes = Y.shape[1]

n_filter_layer_1 = 100
kernel_size_layer_1 = 10

n_filter_layer_2 = 160
kernel_size_layer_2 = 10

name = "keras_cnn_{}-{}_{}-{}.h5".format(n_filter_layer_1, kernel_size_layer_1, n_filter_layer_2, kernel_size_layer_2)

In [78]:
model_m = Sequential(name=name)
#model_m.add(Conv1D(n_filter_layer_1, kernel_size_layer_1, activation='relu', input_shape=(timesteps, data_dim), padding='same'))
#model_m.add(Conv1D(n_filter_layer_1, kernel_size_layer_1, activation='relu'))

model_m.add(Conv2D(n_filter_layer_1, (1, 2), input_shape=(timesteps, data_dim, 1), activation='relu'))

model_m.add(Reshape(target_shape=(timesteps, 23*n_filter_layer_1)))

model_m.add(MaxPooling1D(3))
model_m.add(Conv1D(n_filter_layer_2, kernel_size_layer_2, activation='relu', padding='same'))
model_m.add(Conv1D(n_filter_layer_2, kernel_size_layer_2, activation='relu'))
model_m.add(GlobalAveragePooling1D())

#model_m.add(LSTM(160, return_sequences=True))
model_m.add(Dropout(rate=0.5))

#model_m.add(Flatten())

model_m.add(Dense(num_classes, activation='softmax'))
model_m.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 1500, 23, 100)     300       
_________________________________________________________________
reshape_8 (Reshape)          (None, 1500, 2300)        0         
_________________________________________________________________
max_pooling1d_23 (MaxPooling (None, 500, 2300)         0         
_________________________________________________________________
conv1d_65 (Conv1D)           (None, 500, 160)          3680160   
_________________________________________________________________
conv1d_66 (Conv1D)           (None, 491, 160)          256160    
_________________________________________________________________
global_average_pooling1d_13  (None, 160)               0         
_________________________________________________________________
dropout_25 (Dropout)         (None, 160)               0         
__________

In [79]:
model_m.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [84]:
model_m.fit(x_train, y_train, batch_size=batch_size, epochs=20, shuffle=False, validation_split=0.2, verbose=2)

Train on 71 samples, validate on 18 samples
Epoch 1/20
 - 23s - loss: 1.9272 - acc: 0.3944 - val_loss: 2.1906 - val_acc: 0.1111
Epoch 2/20
 - 23s - loss: 1.9619 - acc: 0.2817 - val_loss: 2.2119 - val_acc: 0.1111
Epoch 3/20
 - 23s - loss: 1.8063 - acc: 0.3380 - val_loss: 2.2486 - val_acc: 0.1111
Epoch 4/20
 - 23s - loss: 1.8823 - acc: 0.3239 - val_loss: 2.2863 - val_acc: 0.1111
Epoch 5/20
 - 23s - loss: 1.8562 - acc: 0.3239 - val_loss: 2.3202 - val_acc: 0.0556
Epoch 6/20
 - 22s - loss: 1.8430 - acc: 0.3099 - val_loss: 2.3401 - val_acc: 0.0556
Epoch 7/20
 - 22s - loss: 1.8207 - acc: 0.2958 - val_loss: 2.3574 - val_acc: 0.0556
Epoch 8/20
 - 23s - loss: 1.8587 - acc: 0.2817 - val_loss: 2.3634 - val_acc: 0.0556
Epoch 9/20
 - 22s - loss: 1.8697 - acc: 0.3099 - val_loss: 2.3640 - val_acc: 0.0556
Epoch 10/20
 - 23s - loss: 1.7740 - acc: 0.3944 - val_loss: 2.3712 - val_acc: 0.0556
Epoch 11/20
 - 23s - loss: 1.7648 - acc: 0.3521 - val_loss: 2.3884 - val_acc: 0.0556
Epoch 12/20
 - 28s - loss: 1.7

<keras.callbacks.History at 0x7f141561f310>

In [85]:
model_m.evaluate(x_valid, y_valid, batch_size=1)



[2.0882601737976074, 0.2608695652173913]