In [1]:
import numpy as np
import tensorflow_text
import tensorflow_hub as hub
import tensorflow as tf

from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

### Dataset Loading

In [2]:
dialogues = open('dailydialog\dialogues_text.txt', 'r', encoding='utf-8').readlines()
emotions = open('dailydialog\dialogues_emotion.txt', 'r', encoding='utf-8').readlines()
acts = open('dailydialog\dialogues_act.txt', 'r', encoding='utf-8').readlines()
topics = open('dailydialog\dialogues_topic.txt', 'r', encoding='utf-8').readlines()

In [3]:
dialogues_list = []
emotions_list = []
intents_list = []
topics_list = []

In [4]:
for index in range(0,len(dialogues)):
    dialogues[index] = ''.join(dialogues[index].rsplit('__eou__', 1))
    dialo_substr = dialogues[index].strip().split('__eou__')
    conv_substr = dialo_substr
    emo_substr = emotions[index].strip().split(' ')
    act_substr = acts[index].strip().split(' ')
    topics[index] = topics[index].strip()
    
    for x in range(0, len(dialo_substr)):
        if x % 2 == 0:
            conv_substr[x] = 'A: '+conv_substr[x].strip()
        else:
            conv_substr[x] = 'B: '+conv_substr[x].strip()
        dialogues_list.append(dialo_substr[x].strip())
        emotions_list.append(emo_substr[x].strip())
        intents_list.append(act_substr[x].strip())
    dialogues[index] = ' '.join(conv_substr)

In [5]:
dialogues_list = np.array(dialogues_list)
emotion_target = np.array(to_categorical(emotions_list))
intent_target = np.array([to_categorical(int(val)-1, num_classes=4) for val in intents_list])
topic_target = np.array([to_categorical(int(val)-1, num_classes=10) for val in topics])

### Model Building

In [6]:
tfhub_handle_encoder = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/2'
tfhub_handle_preprocess = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'

bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)

In [7]:
def build_model(num_classes):
    text_input = Input(shape=(), dtype=tf.string, name='Text')
    preprocessing = hub.KerasLayer(tfhub_handle_preprocess, name='Preprocessing')
    encoder_inputs = preprocessing(text_input)
    encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='SmallBERT')
    outputs = encoder(encoder_inputs)
    dropout = Dropout(0.1)(outputs['pooled_output'])
    classifier = Dense(num_classes, activation='softmax', name='Classifier')(dropout)
    model = Model(text_input, classifier, name='Sentiment-Classifier-Model')
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), 
        loss=tf.keras.losses.CategoricalCrossentropy(), 
        metrics=['accuracy']
    )
    return model

### Emotion Classifier

In [8]:
emotion_classifier = build_model(7)

In [9]:
train_data, test_data, train_label, test_label = train_test_split(dialogues_list, emotion_target, test_size=0.2)

In [10]:
history = emotion_classifier.fit(x=train_data, y=train_label, validation_split=0.2, epochs=1, batch_size=16)



In [11]:
emotion_classifier.save('classifiers/emotion')



INFO:tensorflow:Assets written to: classifiers/emotion\assets


INFO:tensorflow:Assets written to: classifiers/emotion\assets


In [12]:
emotion_classifier.evaluate(x=test_data, y=test_label, batch_size=16)



[0.39416149258613586, 0.8621577024459839]

### Intent Classifier

In [9]:
train_data, test_data, train_label, test_label = train_test_split(dialogues_list, intent_target, test_size=0.2)

In [10]:
intent_classifier = build_model(4)

In [11]:
intent_classifier.fit(x=train_data, y=train_label, validation_split=0.2, epochs=1, batch_size=16)



<keras.callbacks.History at 0x1535282da60>

In [12]:
intent_classifier.save('classifiers/intent')



INFO:tensorflow:Assets written to: classifiers/intent\assets


INFO:tensorflow:Assets written to: classifiers/intent\assets


In [13]:
intent_classifier.evaluate(x=test_data, y=test_label, batch_size=16)



[0.48393499851226807, 0.8138958811759949]

### Topic Classifier

In [9]:
train_data, test_data, train_label, test_label = train_test_split(np.array(dialogues), topic_target, test_size=0.2)

In [10]:
topic_classifier = build_model(10)

In [11]:
topic_classifier.fit(x=train_data, y=train_label, validation_split=0.2, epochs=1, batch_size=16)



<keras.callbacks.History at 0x27b3c9cb970>

In [12]:
topic_classifier.save('classifiers/topic')



INFO:tensorflow:Assets written to: classifiers/topic\assets


INFO:tensorflow:Assets written to: classifiers/topic\assets


In [13]:
topic_classifier.evaluate(x=test_data, y=test_label, batch_size=16)



[0.8948160409927368, 0.6855944991111755]