In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import pandas as pd
import numpy as np
from ktrain import text

input = pd.read_csv('../data/complete_ktrain.csv')

(x_train, y_train), (x_val, y_val), preprocessing = text.texts_from_df(train_df=input, text_column='sentence',
    label_columns=['joy', 'trust', 'fear', 'surprise', 'sadness', 'disgust', 'anger', 'anticipation', 'neutral'],
                        val_pct=0.2, max_features=1000, maxlen=75)
print(x_train.shape)
print(y_train.shape)
print(x_train)

# generate balanced weights for training
from sklearn.utils import class_weight
def generate_balanced_weights(y_train):
    y_labels = [y.argmax() for y in y_train]
    class_weights = class_weight.compute_class_weight('balanced', np.unique(y_labels), y_labels)
    weight_dict = {}
    for key in range(len(class_weights)):
        weight_dict[key] = class_weights[key]
    return weight_dict

class_weight_dict = generate_balanced_weights(y_train)
print(class_weight_dict)

# model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.models import Model

max_length = 75
max_words = 1000
features = 200
classes = 9
input_1 = Input(shape=(max_length,))
embed_1 = Embedding(input_dim=(max_words), output_dim=features, input_length=max_length)(input_1)
bi_lstm_1 = Bidirectional(LSTM(units=32, activation='tanh', dropout=0.2, return_sequences=True))(embed_1)
bi_lstm_2 = Bidirectional(LSTM(units=32, activation='tanh', dropout=0.2, return_sequences=True))(bi_lstm_1)
bi_lstm_3 = Bidirectional(LSTM(units=16, activation='tanh', dropout=0.2, return_sequences=False))(bi_lstm_2)
softmax_1 = Dense(units=classes, activation='softmax')(bi_lstm_3)

model = Model(inputs=input_1, outputs=softmax_1)
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

print(model.summary())

['joy', 'trust', 'fear', 'surprise', 'sadness', 'disgust', 'anger', 'anticipation', 'neutral']
     joy  trust  fear  surprise  sadness  disgust  anger  anticipation  \
364    0      0     0         0        0        0      0             1   
648    1      0     0         0        0        0      0             0   
882    0      0     0         0        0        0      0             0   
95     0      0     1         0        0        0      0             0   
240    1      0     0         0        0        0      0             0   

     neutral  
364        0  
648        0  
882        1  
95         0  
240        0  
['joy', 'trust', 'fear', 'surprise', 'sadness', 'disgust', 'anger', 'anticipation', 'neutral']
     joy  trust  fear  surprise  sadness  disgust  anger  anticipation  \
873    0      1     0         0        0        0      0             0   
686    0      1     0         0        0        0      0             0   
274    0      0     0         0        0        0    

In [2]:
history = model.fit(x=x_train, y=y_train, validation_data=(x_val, y_val), batch_size=16, epochs=20, class_weight=class_weight_dict)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [3]:
import ktrain

In [4]:
learner = ktrain.get_learner(model, train_data=(x_train, y_train), val_data=(x_val, y_val))
predictor = ktrain.get_predictor(learner.model, preproc=preprocessing)

In [5]:
test = pd.read_csv('../data/test_set.csv')
sentences = test['sentence'].values
labels = test['label'].values

In [22]:
emotions = ['joy', 'trust', 'fear', 'surprise', 'sadness', 'disgust', 'anger', 'anticipation', 'neutral']
index = 1
print('Sentence: ', sentences[index])
print('Label: ', emotions[labels[index]])
predictor.explain(sentences[index])

Sentence:    My husband has end stage liver disease  and I am so glad I found this place - the info seems really helpful  and the folks seen so understanding
Label:  joy


Contribution?,Feature
0.641,Highlighted in text (sum)
-1.149,<BIAS>
