### Get arrays and annotations

In [2]:
from get_data import get_arrays, get_static_annotations

arrays, ids = get_arrays()
id_to_annotation = get_static_annotations()

print("N_arrays = {}, N_ids = {}".format(len(arrays), len(ids)))
print("id_to_annotation: Length = {}".format(len(id_to_annotation)))

print("Shape of first 5 arrays: {} {} {} {} {}".format(arrays[0].shape,arrays[1].shape,arrays[2].shape,arrays[3].shape,arrays[4].shape))
print()
print("First array")
print(arrays[0])

N_arrays = 1802, N_ids = 1802
id_to_annotation: Length = 1802
Shape of first 5 arrays: (224, 260) (595, 260) (423, 260) (88, 260) (88, 260)

First array
[[ 3.220214e+01  4.732088e+01  3.005607e-01 ... -1.217681e-01
   1.500598e+00 -1.187273e-01]
 [ 2.448522e+01  6.520026e+01  2.075381e-02 ...  4.512318e-02
   1.284054e+00  7.781623e-02]
 [ 7.830985e+01  9.389572e+01  2.214237e-02 ... -1.268222e-02
   1.521996e+00 -8.627687e-03]
 ...
 [ 1.247303e-01  5.512440e+01  3.664538e-03 ...  2.325155e-02
   9.236774e-01  3.972320e-02]
 [ 1.059528e+01  5.830891e+01  2.850553e-02 ...  5.302914e-02
   1.256330e+00  1.650663e-02]
 [ 3.453281e+01  3.737027e+01  3.949210e-01 ... -8.443007e-02
   1.369008e+00  3.063905e-02]]


### Split into Development and Evaluation Arrays

In [7]:
import numpy as np

dev_arrays = []; dev_valence = []; dev_arousal = [];
eval_arrays = []; eval_valence = []; eval_arousal = [];

for array, id_ in zip(arrays, ids):
    if id_ <= 2000:
        dev_arrays.append(array)
        dev_valence.append(id_to_annotation[id_]["valence"])
        dev_arousal.append(id_to_annotation[id_]["arousal"])
    else:
        eval_arrays.append(array)
        eval_valence.append(id_to_annotation[id_]["valence"])
        eval_arousal.append(id_to_annotation[id_]["arousal"])

dev_valence = np.array(dev_valence)
dev_arousal = np.array(dev_arousal)
eval_valence = np.array(eval_valence)
eval_arousal = np.array(eval_arousal)
        
print("Dev Arrays: N_arrays = {}".format(len(dev_arrays)))
print("Eval Arrays: N_arrays = {}".format(len(eval_arrays)))

Dev Arrays: N_arrays = 1744
Eval Arrays: N_arrays = 58


### Find statistics of sequence lengths

In [9]:
dev_seq_lengths = np.array([a.shape[0] for a in dev_arrays])
eval_seq_lengths = np.array([a.shape[0] for a in eval_arrays])

s = dev_seq_lengths
print("Dev Arrays Seq Lengths: Max = {}, Min = {}, Mean = {:.2f}, Median = {}, 75%tile = {}".format(np.max(s), np.min(s), np.mean(s), np.median(s), np.percentile(s, q=0.75)))
s = eval_seq_lengths
print("Eval Arrays Seq Lengths: Max = {}, Min = {}, Mean = {:.2f}, Median = {}, 75%tile = {}".format(np.max(s), np.min(s), np.mean(s), np.median(s), np.percentile(s, q=0.75)))

Dev Arrays Seq Lengths: Max = 1170, Min = 88, Mean = 246.89, Median = 88.0, 75%tile = 88.0
Eval Arrays Seq Lengths: Max = 1253, Min = 97, Mean = 466.45, Median = 455.0, 75%tile = 103.4125


### Keras Import

In [10]:
from keras import backend as K
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation, Dropout
from keras.callbacks import EarlyStopping

### Pad Sequences

In [12]:
dev_set = pad_sequences(dev_arrays, maxlen=1300, padding="pre", truncating="pre", dtype="float32")
eval_set = pad_sequences(eval_arrays, maxlen=1300, padding="pre", truncating="pre", dtype="float32")

print("Dev Set: Shape = {}, Data = {}".format(dev_set.shape, dev_set.dtype))
print("Dev Set: Shape = {}, Data = {}".format(eval_set.shape, eval_set.dtype))

Dev Set: Shape = (1744, 1300, 260), Data = float32
Dev Set: Shape = (58, 1300, 260), Data = float32


### LSTM Model

In [22]:
model = Sequential()
model.add(LSTM(input_shape = (1300, 260), units = 260, return_sequences=False))
model.add(Dense(units=1))
model.compile(optimizer="sgd", loss="mean_squared_error")
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 260)               541840    
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 261       
Total params: 542,101
Trainable params: 542,101
Non-trainable params: 0
_________________________________________________________________


## Valence
### Fit Model

In [25]:
early_stopping = EarlyStopping(monitor='val_loss', patience=2)
model.fit(dev_set, dev_valence, batch_size=32, validation_split=0.2, verbose=1, callbacks=[early_stopping])

Train on 1395 samples, validate on 349 samples
Epoch 1/1


<keras.callbacks.History at 0x12b4ab2b0>

### Evaluate Model

In [26]:
mse = model.evaluate(eval_set, eval_valence)
print("Eval Set: RMSE = {:.2f}".format(np.sqrt(mse)))

Eval Set: RMSE = 1.43
