In [1]:
import os
import pandas as pd
import numpy as np
import logging
import math
from keras.models import Sequential
from keras.layers import Dense
try:
    from keras.layers import CuDNNLSTM as LSTM
except Exception as e:
    print(e)
    from keras.layers import LSTM
from keras.layers import Bidirectional
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.python.client import device_lib
from keras.layers import TimeDistributed
from sklearn.metrics import confusion_matrix
import h5py 
from keras.utils import np_utils
from sklearn.model_selection import KFold, cross_val_score
from keras.wrappers.scikit_learn import KerasClassifier
import pickle
from keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelBinarizer
from keras.layers.core import Dense, Activation, Dropout
import tensorflow as tf
print(tf.__version__)
print(device_lib.list_local_devices())
logger = logging.getLogger('data.composer')

COMPOSED_TABLES_DIR = 'transformed_data'


Using TensorFlow backend.


1.4.0
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 16720547626738572637
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1551427174
locality {
  bus_id: 1
}
incarnation: 16187059223975293034
physical_device_desc: "device: 0, name: GeForce 920MX, pci bus id: 0000:01:00.0, compute capability: 5.0"
]


In [2]:
scaler = MinMaxScaler(feature_range=(0, 1))
all_files = set(os.listdir(COMPOSED_TABLES_DIR))
with open('test_subset.pkl', 'rb') as f:
    test_files = set(pickle.load(f))
with open('train_subset.pkl', 'rb') as f:
    train_files = set(pickle.load(f))
# train_files = all_files.difference(test_files)

def get_data(files):
    for file_name in files:
        # logger.info(file_name)
        print(os.path.join(COMPOSED_TABLES_DIR, file_name))
        df = pd.read_csv(os.path.join(COMPOSED_TABLES_DIR, file_name), delimiter=',')
        X = df.iloc[:, 10:].as_matrix()
        y = df[['Anger','Sad','Disgust','Happy','Scared','Neutral']].as_matrix()
        agreement = df['Agreement score']
        X = scaler.fit_transform(X)
        Xd, yd = create_dataset(X, y, agreement)
        yield Xd, yd

def get_test_data():
    iterator = get_data(test_files)
    X, y = next(iterator)
    for Xd, yd in iterator:
        # print(X.shape, Xd.shape)
        X = np.concatenate((X, Xd), axis=0)
        y = np.concatenate((y, yd), axis=0)
    return X, y
        
def create_dataset(X, y, agreement, look_back=100):
    dataX, dataY = [], []
    for i in range(len(X)-look_back-1):
        dataX.append(X[i:i+look_back])
        y_mul_agr = np.multiply(y[i:i+look_back], agreement[i:i+look_back].reshape(-1, 1))
        # dataY.append(np.average(y_mul_agr, axis=1))
        dataY.append(y_mul_agr)
    return np.array(dataX), np.array(dataY)
    
def train():
    look_back = 100
        
    model = Sequential()
    model.add(Bidirectional(LSTM(512, return_sequences=True), input_shape=(look_back, 173)))
    model.add(Dropout(0.5))
    model.add(LSTM(512, return_sequences=True))
    model.add(Dropout(0.5))

    model.add(TimeDistributed(Dense(6, activation='sigmoid')))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    
    for file_num, (trainX, trainY) in enumerate(get_data(train_files)):
        print(file_num, '/', len(train_files))
        # reshape into X=t and Y=t+1
        # reshape input to be [samples, time steps, features]
        # trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[2], look_back))
        # testX = np.reshape(testX, (testX.shape[0], testX.shape[2], look_back))
        model.fit(trainX, trainY, epochs=1, batch_size=128, verbose=1)
        # model.save('lstm_keras_simple.h5')
        # make predictions
        # trainPredict = model.predict(trainX)
#         testPredict = model.predict(testX)
#         num_classes = 6
#         y_categorial = np_utils.to_categorical(testPredict, num_classes)
#         y_categorial_test = np_utils.to_categorical(testY, num_classes)
#         estimator = KerasClassifier(build_fn=model, epochs=1, batch_size=5, verbose=1)
        # from categorial to lable indexing
        # y_pred = y_categorial.argmax(1)
        # y_test = y_categorial_test.argmax(1)
#         kfold = KFold(n_splits=2, random_state=0)
#         results = cross_val_score(estimator, testX, testY, cv=kfold, scoring="accuracy")
#         print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
        
        # print(confusion_matrix(y_pred, y_test))
    
    model.save('lstm_keras_simple.h5')
    return model

def invert_categorical(arr):
    labels = []
    for row in arr:
        labels.append(np.argmax(row))
    return labels

In [11]:
model = train()

transformed_data\idc8354906.csv




0 / 30
Epoch 1/1
transformed_data\idc73b88b6.csv
1 / 30
Epoch 1/1
transformed_data\id79b43c27.csv
2 / 30
Epoch 1/1
transformed_data\id8ed4825e.csv
3 / 30
Epoch 1/1
transformed_data\id6608bab6.csv
4 / 30
Epoch 1/1
transformed_data\id911ae0ab.csv
5 / 30
Epoch 1/1
transformed_data\id646b218d.csv
6 / 30
Epoch 1/1
transformed_data\id2464e914.csv
7 / 30
Epoch 1/1
transformed_data\id70d80b64.csv
8 / 30
Epoch 1/1
transformed_data\id8ecc3ed4.csv
9 / 30
Epoch 1/1
transformed_data\ide3e0992e.csv
10 / 30
Epoch 1/1
transformed_data\ide8f7cf6f.csv
11 / 30
Epoch 1/1
transformed_data\id9d11e3a2.csv
12 / 30
Epoch 1/1
transformed_data\idce019e48.csv
13 / 30
Epoch 1/1
transformed_data\id350a4e4d.csv
14 / 30
Epoch 1/1
transformed_data\id4d28179d.csv
15 / 30
Epoch 1/1
transformed_data\idd7aeecb6.csv
16 / 30
Epoch 1/1
transformed_data\idc057e450.csv
17 / 30
Epoch 1/1
transformed_data\id61fb0c0d.csv
18 / 30
Epoch 1/1
transformed_data\id7d0837f1.csv
19 / 30
Epoch 1/1
transformed_data\id81c6e925.csv
20 / 30
Ep

In [None]:
# model = load_model('lstm_keras_simple.h5')
print('model loaded')
testX, testY = get_test_data()
testY = testY.reshape((testY.shape[0] * testY.shape[1], 6))

# predicted = model.predict(testX)
# predicted = predicted.reshape((predicted.shape[0] * predicted.shape[1], 6))
# testY_labels = invert_categorical(testY)

# predicted_labels = invert_categorical(predicted)
# print('Accuracy: ', accuracy_score(testY_labels, predicted_labels))
# print('Confusion matrix: ')
# print(confusion_matrix(testY_labels, predicted_labels))

model loaded
transformed_data\id2d43cba9.csv




transformed_data\id9ee0e61f.csv
transformed_data\idfb45d082.csv
transformed_data\id9cfbf990.csv
transformed_data\id9a916a92.csv
transformed_data\ida5f9add0.csv
