In [None]:
#Model architecture inspired by Y. Kim, "Convolutional Neural Networks for Sentence Classification", 2014
!pip install tensorflow==1.15
!pip install tensorflow_hub>=0.6.0
!pip3 install tensorflow_text==1.15

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import re
import nltk
from nltk.corpus import stopwords

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Input, Conv1D, GlobalMaxPool1D, Flatten
from tensorflow.keras.layers import Lambda

def process_text(document):
     
    # Remove extra white space from text
    document = re.sub(r'\s+', ' ', document, flags=re.I)
         
    # Remove all the special characters from text
    document = re.sub(r'\W', ' ', str(document))
 
    # Converting to Lowercase
    document = document.lower()
 
    # Word tokenization       
    tokens = document.split()

    tokens = [word for word in tokens if len(word) > 2]
 
    return tokens

In [None]:
df = pd.read_csv('LOCAL_PATH_TO_DATASET')
df = df[['Emotion','Statement']]
display(df.head())

In [None]:
from tqdm import tqdm

nltk.download('stopwords')
# For sentence tokenization
#nltk.download('punkt')
en_stop = set(nltk.corpus.stopwords.words('english'))

nltk.download('wordnet')
df['preprocessedStatement'] = df.Statement.apply(process_text)
display(df.head())

In [None]:
max_length = df.preprocessedStatement.apply(lambda x: len(x)).max()

t = Tokenizer()
t.fit_on_texts(df['preprocessedStatement'] )
vocab_size = len(t.word_index) + 1

In [None]:
new_X = []
for seq in df['preprocessedStatement']:
    new_seq = []
    for i in range(max_length):
        try:
            new_seq.append(seq[i])
        except:
            new_seq.append("PADword")
    new_X.append(new_seq)

In [None]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
# Encode labels in column 'Emotion'. 
df['Emotion'] = le.fit_transform(df['Emotion']) 
y = df.pop('Emotion')
y_new = tf.keras.utils.to_categorical(y, num_classes=7)
print(y_new)

In [None]:
new_X_df = pd.DataFrame(new_X)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(new_X_df, y_new, test_size=0.04, stratify=y)

In [None]:
#for meld-dd 64
x_val, y_val = x_train[:174], y_train[:174] 
x_train, y_train = x_train[174:], y_train[174:]
x_val, y_val = x_val[:128], y_val[:128] 
x_test, y_test = x_test[:1216], y_test[:1216]

# #for isear 32
# x_val, y_val = x_train[:143], y_train[:143] 
# x_train, y_train = x_train[143:], y_train[143:]
# x_val, y_val = x_val[:128], y_val[:128] 
# x_test, y_test = x_test[:288], y_test[:288]

In [None]:
batch_size = 64 #32 for Isear, 64 for Meld-dd

import tensorflow_hub as hub
from tensorflow.keras import backend as K

sess = tf.Session()
K.set_session(sess)

elmo_model = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())

In [None]:
def ElmoEmbedding(x):
    return elmo_model(inputs={
                            "tokens": tf.squeeze(tf.cast(x, tf.string)),
                            "sequence_len": tf.constant(batch_size*[max_length])
                      },
                      signature="tokens",
                      as_dict=True)["elmo"]

In [None]:
from tensorflow.keras import activations

def focal_loss(gamma=2., alpha=4.):

    gamma = float(gamma)
    alpha = float(alpha)

    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.math.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)
    return focal_loss_fixed

In [None]:
callback = EarlyStopping(monitor='val_loss', patience=3)
input_layer = Input(shape=(max_length, ), batch_size = batch_size, dtype=tf.string) 
embedding = Lambda(ElmoEmbedding, output_shape=(max_length, 1024))(input_layer)
x = Conv1D(128, 5, activation='relu')(embedding)
x = GlobalMaxPool1D()(x)
x = Flatten()(x)
x = Dense(64, activation="relu")(x) 
x = Dense(7, activation='softmax')(x)
model_elmo = Model(inputs=input_layer, outputs=x)
model_elmo.compile(loss=focal_loss(alpha=1), optimizer='adam', metrics=['accuracy']) #Alternative: tf.keras.metrics.Recall() as metric
model_elmo.summary()

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model_elmo,show_shapes= True)

In [None]:
model_elmo.fit(x_train, y_train, epochs = 10, callbacks=[callback], validation_data=(x_val, y_val))

In [None]:
y_pred = model_elmo.predict(x_test)

In [None]:
y_pred_clean = np.argmax(y_pred, 1)
y_test_clean = np.argmax(y_test, 1)

In [None]:
print(classification_report(y_test_clean, y_pred_clean))