In [74]:
import os
import pandas as pd
import numpy as np
import logging
import math
from keras.models import Sequential
from keras.layers import Dense
try:
    from keras.layers import CuDNNLSTM as LSTM
except Exception as e:
    print(e)
    from keras.layers import LSTM
from keras import regularizers
from keras.layers import Bidirectional
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.python.client import device_lib
from keras.layers import TimeDistributed
from sklearn.metrics import confusion_matrix
import h5py 
from keras.utils import np_utils
from sklearn.model_selection import KFold, cross_val_score
from keras.wrappers.scikit_learn import KerasClassifier
import pickle
from keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelBinarizer
from keras.layers.core import Dense, Activation, Dropout

logger = logging.getLogger('data.composer')

COMPOSED_TABLES_DIR = 'transformed_data_1'
model = Sequential()


In [75]:
LOOK_BACK = 100

scaler = MinMaxScaler(feature_range=(0, 1))
all_files = set(os.listdir(COMPOSED_TABLES_DIR))
with open('test_subset.pkl', 'rb') as f:
    test_files = set(pickle.load(f))
with open('train_subset.pkl', 'rb') as f:
    train_files = set(pickle.load(f))
# train_files = all_files.difference(test_files)

def get_data(files):
    for file_name in files:
        # logger.info(file_name)
        print(os.path.join(COMPOSED_TABLES_DIR, file_name))
        df = pd.read_csv(os.path.join(COMPOSED_TABLES_DIR, file_name), delimiter=',')
        X = df.iloc[:, 10:].as_matrix()
        y = df[['Anger','Sad','Disgust','Happy','Scared','Neutral']].as_matrix()
        agreement = df['Agreement score']
        X = scaler.fit_transform(X)
        Xd, yd = create_dataset(X, y, agreement, LOOK_BACK)
        yield Xd, yd

def get_test_data():
    iterator = get_data(test_files)
    for X, y in iterator:
        # y = y.reshape((y.shape[0] * y.shape[1], 6))
        for i in range(0, len(X)-101, 100):
            yield X[i:i+100], y[i:i+100]
        yield X[i:], y[i:]
        # yield X, y
        
def create_dataset(X, y, agreement, look_back=100):
    dataX, dataY = [], []
    for i in range(0, len(X)-look_back-1, 1):
        dataX.append(X[i:i+look_back])
        y_mul_agr = y[i:i+look_back]
        dataY.append(np.average(y_mul_agr, axis=0))
        # dataY.append(y_mul_agr)
    return np.array(dataX), np.array(dataY)
    
def create_model():
#     model.add(TimeDistributed(Dense(100, kernel_regularizer=regularizers.l2(0.01),
#                 activity_regularizer=regularizers.l1(0.01), kernel_initializer='he_uniform'), input_shape=(LOOK_BACK, 177)))
#     model.add(Dropout(0.5))
#     model.add(TimeDistributed(Dense(100, kernel_regularizer=regularizers.l2(0.01),
#                 activity_regularizer=regularizers.l1(0.01), kernel_initializer='he_uniform')))
#     model.add(Dropout(0.5))
    model.add(Bidirectional(LSTM(128, return_sequences=True), input_shape=(LOOK_BACK, 177)))
    model.add(Dropout(0.3))
    model.add(Bidirectional(LSTM(128, return_sequences=False)))
    model.add(Dropout(0.3))
#     model.add(TimeDistributed(Dense(100, kernel_regularizer=regularizers.l2(0.01),
#                 activity_regularizer=regularizers.l1(0.01), kernel_initializer='he_uniform')))
#     model.add(TimeDistributed(Dense(100, kernel_regularizer=regularizers.l2(0.01),
#                 activity_regularizer=regularizers.l1(0.01), kernel_initializer='he_uniform')))
#     model.add(TimeDistributed(Dense(50, kernel_regularizer=regularizers.l2(0.01),
#                 activity_regularizer=regularizers.l1(0.01), kernel_initializer='he_uniform')))
    model.add(Dense(200, activation='relu', kernel_regularizer=regularizers.l2(0.01),
                activity_regularizer=regularizers.l2(0.01), kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation='relu', kernel_regularizer=regularizers.l2(0.01),
                activity_regularizer=regularizers.l2(0.01), kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.01),
                activity_regularizer=regularizers.l2(0.01), kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.01),
                activity_regularizer=regularizers.l2(0.01), kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.01),
                activity_regularizer=regularizers.l2(0.01), kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.01),
                activity_regularizer=regularizers.l2(0.01), kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.01),
                activity_regularizer=regularizers.l2(0.01), kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.01),
                activity_regularizer=regularizers.l2(0.01), kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(6, activation='sigmoid', kernel_regularizer=regularizers.l2(0.01),
                activity_regularizer=regularizers.l2(0.01), kernel_initializer='he_uniform'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

def train():
    for file_num, (trainX, trainY) in enumerate(get_data(train_files)):
        print(file_num, '/', len(train_files))
        print('Y: ', np.average(trainY, axis=0))

        model.fit(trainX, trainY, epochs=1, batch_size=64, verbose=1)

    model.save('lstm_keras.h5')
    return model

def invert_categorical(arr):
    labels = []
    for row in arr:
        labels.append(np.argmax(row))
    return labels

In [76]:
create_model()
model = train()

transformed_data_1\idfc9eb423.csv
0 / 30
Y:  [ 0.00344788  0.          0.          0.          0.99655212  0.        ]
Epoch 1/1
transformed_data_1\id7d0837f1.csv
1 / 30
Y:  [ 0.          0.          0.88518691  0.          0.          0.11481309]
Epoch 1/1
transformed_data_1\id460cb4e1.csv
2 / 30
Y:  [ 1.  0.  0.  0.  0.  0.]
Epoch 1/1
transformed_data_1\idc057e450.csv
3 / 30
Y:  [ 1.  0.  0.  0.  0.  0.]
Epoch 1/1
transformed_data_1\id77720abd.csv
4 / 30
Y:  [ 0.          0.69333488  0.          0.          0.          0.30666512]
Epoch 1/1
transformed_data_1\id19a15835.csv
5 / 30
Y:  [ 0.  1.  0.  0.  0.  0.]
Epoch 1/1
transformed_data_1\id1d656472.csv
6 / 30
Y:  [ 0.22398978  0.35891779  0.          0.          0.41709243  0.        ]
Epoch 1/1
transformed_data_1\id8036ccb4.csv
7 / 30
Y:  [ 0.  0.  1.  0.  0.  0.]
Epoch 1/1
transformed_data_1\idd721711a.csv
8 / 30
Y:  [ 1.  0.  0.  0.  0.  0.]
Epoch 1/1
transformed_data_1\ide7ff1648.csv
9 / 30
Y:  [ 0.  0.  0.  0.  0.  1.]
Epoch 1/

In [77]:
model = load_model('lstm_keras.h5')
print('model loaded')

generator = get_test_data()
testX, testY = next(generator)
predicted =  model.predict_on_batch(testX)
for testX1, testY1 in generator:
    testY = np.concatenate((testY, testY1), axis=0)
    predicted1 = model.predict_on_batch(testX1)
    predicted = np.concatenate((predicted, predicted1), axis=0)
    
print(testY.shape, predicted.shape)
# testY = testY.reshape((testY.shape[0] * testY.shape[1], 6))
# predicted = predicted.reshape((predicted.shape[0] * predicted.shape[1], 6))
testY_labels = invert_categorical(testY)

predicted_labels = invert_categorical(predicted)
print(len(predicted), len(testY_labels))
print('Accuracy: ', accuracy_score(testY_labels, predicted_labels))
print('Confusion matrix: ')
print(confusion_matrix(testY_labels, predicted_labels))

model loaded
transformed_data_1\idac6a0586.csv
transformed_data_1\id37146566.csv
transformed_data_1\idc8354906.csv
transformed_data_1\id6d07cec0.csv
transformed_data_1\idf948893b.csv
transformed_data_1\idf71db6d3.csv
(26440, 6) (26440, 6)
26440 26440
Accuracy:  0.166679273828
Confusion matrix: 
[[   0    0    0    0    0 4406]
 [   0    0    0    0    0 4407]
 [   0    0    0    0    0 4407]
 [   0    0    0    0    0 4407]
 [   0    0    0    0    0 4406]
 [   0    0    0    0    0 4407]]


In [51]:
a = pd.DataFrame([1.0, 2.0, 3.0, 0.0])

In [52]:
(a == 0).all(axis=1).astype(int)


0    0
1    0
2    0
3    1
dtype: int32

In [54]:
a

Unnamed: 0,0
0,1.0
1,2.0
2,3.0
3,0.0
