In [1]:
import torch
import json
from keras.preprocessing.sequence import pad_sequences
import pickle
import pandas as pd
from keras.preprocessing.text import Tokenizer
import torch.nn as nn
import numpy as np, pickle, time, argparse
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score, classification_report, precision_recall_fscore_support

import tensorflow as tf
import contractions
from sklearn.preprocessing import LabelBinarizer
import time
import tensorflow.keras as k
import os
import matplotlib.pyplot as plt
import tensorflow_addons as tfa

In [2]:
"deal with contracted texts"
def expand_text(text):
    expanded_words = []
    text = text.encode('utf-8').decode('cp1252').replace("Â’", "'")
    #text = text.replace("’", "'")
    for word in text.split():
      # using contractions.fix to expand the shotened words
      expanded_words.append(contractions.fix(word))   

    expanded_text = ' '.join(expanded_words)
    return expanded_text



"clean dataset"
def preprocess_text(x):
    for punct in '"!&?.,}-/<>#$%\()*+:;=?@[\\]^_`|\~':
        x = x.replace(punct, ' ')
    x = ' '.join(x.split())
    x = x.lower()
    
    return x


def create_utterances(filename, split):
    sentences, emotion_labels, speakers, conv_id, = [], [], [], []
    
    lengths = []
    with open(filename, 'r', encoding='latin1') as f:
        a = json.load(f)
        for c_id, line in enumerate(a):
            for item in line:
                sentences.append(item['utterance'])
                emotion_labels.append(item['emotion'])
                conv_id.append(split[:2] + '_c' + str(c_id))
                speakers.append(item['speaker'])
            
            # u_id += 1
                
    data = pd.DataFrame(sentences, columns=['sentence'])
    data['sentence'] = data['sentence'].apply(lambda x: expand_text(x))
    data['sentence'] = data['sentence'].apply(lambda x: preprocess_text(x))

    data['emotion_label'] = emotion_labels
    data['speaker'] = speakers
    data['conv_id'] = conv_id

    
    return data


In [4]:
"create embedding"
def load_pretrained_glove():
    print("Loading GloVe model, this can take some time...")
    glv_vector = {}
    f = open('glove.840B.300d.txt', encoding='utf-8')

    for line in f:
        values = line.split()
        word = values[0]
        try:
            coefs = np.asarray(values[1:], dtype='float')
            glv_vector[word] = coefs
        except ValueError:
            continue
    f.close()
    print("Completed loading pretrained GloVe model.")
    return glv_vector

def encode_labels(encoder, l):
    return encoder[l]


if __name__ == '__main__':

    train_data = create_utterances('Friends/friends_train.json', 'train')
    valid_data = create_utterances('Friends/friends_dev.json', 'valid')
    test_data = create_utterances('Friends/friends_test.json', 'test')
    
    ## encode the emotion and dialog act labels ##
    all_emotion_labels =  set(train_data['emotion_label'])
    emotion_label_encoder, emotion_label_decoder = {}, {}


    for i, label in enumerate(all_emotion_labels):
        emotion_label_encoder[label] = i
        emotion_label_decoder[i] = label


    pickle.dump(emotion_label_encoder, open('emotion_label_encoder.pkl', 'wb'))
    pickle.dump(emotion_label_decoder, open('emotion_label_decoder.pkl', 'wb'))

    train_data['encoded_emotion_label'] = train_data['emotion_label'].map(lambda x: encode_labels(emotion_label_encoder, x))
    test_data['encoded_emotion_label'] = test_data['emotion_label'].map(lambda x: encode_labels(emotion_label_encoder, x))
    valid_data['encoded_emotion_label'] = valid_data['emotion_label'].map(lambda x: encode_labels(emotion_label_encoder, x))
    
    
    ## tokenize all sentences ##
    all_text = list(train_data['sentence'])
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(all_text)
    pickle.dump(tokenizer, open('tokenizer.pkl', 'wb'))

    ## convert the sentences into sequences ##
    train_sequence = tokenizer.texts_to_sequences(list(train_data['sentence']))
    valid_sequence = tokenizer.texts_to_sequences(list(valid_data['sentence']))
    test_sequence = tokenizer.texts_to_sequences(list(test_data['sentence']))
    
    train_data['sentence_length'] = [len(item) for item in train_sequence]
    valid_data['sentence_length'] = [len(item) for item in valid_sequence]
    test_data['sentence_length'] = [len(item) for item in test_sequence]
    
    max_num_tokens = 250

    train_sequence = pad_sequences(train_sequence, maxlen=max_num_tokens, padding='post')
    valid_sequence = pad_sequences(valid_sequence, maxlen=max_num_tokens, padding='post')
    test_sequence = pad_sequences(test_sequence, maxlen=max_num_tokens, padding='post')

    train_data['sequence'] = list(train_sequence)
    valid_data['sequence'] = list(valid_sequence)
    test_data['sequence'] = list(test_sequence)
    
   
    
    ## save pretrained embedding matrix ##
    glv_vector = load_pretrained_glove()
    word_vector_length = len(glv_vector['the'])
    word_index = tokenizer.word_index
    inv_word_index = {v: k for k, v in word_index.items()}
    num_unique_words = len(word_index)
    glv_embedding_matrix = np.zeros((num_unique_words+1, word_vector_length))

    for j in range(1, num_unique_words+1):
        try:
            glv_embedding_matrix[j] = glv_vector[inv_word_index[j]]
        except KeyError:
            glv_embedding_matrix[j] = np.random.randn(word_vector_length)/200

    np.ndarray.dump(glv_embedding_matrix, open('glv_embedding_matrix', 'wb'))
    print ('Done. Completed preprocessing.')

Loading GloVe model, this can take some time...
Completed loading pretrained GloVe model.
Done. Completed preprocessing.


In [5]:
train_data['emotion_true'] = pd.get_dummies(train_data['encoded_emotion_label']).values.tolist()


In [6]:
train_data

Unnamed: 0,sentence,emotion_label,speaker,conv_id,encoded_emotion_label,sentence_length,sequence,emotion_true
0,also i was the point person on my company's tr...,neutral,Chandler,tr_c0,3,18,"[371, 1, 31, 5, 695, 401, 33, 26, 2758, 2759, ...","[0, 0, 0, 1, 0, 0, 0, 0]"
1,you must have had your hands full,neutral,The Interviewer,tr_c0,3,7,"[2, 311, 17, 98, 44, 643, 760, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0]"
2,that i did that i did,neutral,Chandler,tr_c0,3,6,"[9, 1, 48, 9, 1, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[0, 0, 0, 1, 0, 0, 0, 0]"
3,so let us talk a little bit about your duties,neutral,The Interviewer,tr_c0,3,10,"[23, 84, 79, 175, 7, 100, 402, 54, 44, 1470, 0...","[0, 0, 0, 1, 0, 0, 0, 0]"
4,my duties all right,surprise,Chandler,tr_c0,6,4,"[26, 1470, 34, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0]"
...,...,...,...,...,...,...,...,...
10556,you or me,neutral,Chandler,tr_c719,3,3,"[2, 112, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0]"
10557,i got it uh joey women do not have adam's apples,non-neutral,Ross,tr_c719,5,11,"[1, 57, 6, 51, 81, 316, 12, 8, 17, 2757, 5888,...","[0, 0, 0, 0, 0, 1, 0, 0]"
10558,you guys are messing with me right,surprise,Joey,tr_c719,6,7,"[2, 87, 13, 2747, 37, 20, 36, 0, 0, 0, 0, 0, 0...","[0, 0, 0, 0, 0, 0, 1, 0]"
10559,yeah,neutral,All,tr_c719,3,1,"[24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0, 0, 0, 1, 0, 0, 0, 0]"


In [7]:
train_data['sequence'] = np.array(train_data['sequence'])

In [8]:
dialogue_train_data = train_data.groupby("conv_id").agg(list)


In [9]:
dialogue_train_data

Unnamed: 0_level_0,sentence,emotion_label,speaker,encoded_emotion_label,sentence_length,sequence,emotion_true
conv_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
tr_c0,[also i was the point person on my company's t...,"[neutral, neutral, neutral, neutral, surprise,...","[Chandler, The Interviewer, Chandler, The Inte...","[3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 7, 3, 6, 3, 6, ...","[18, 7, 6, 10, 4, 16, 2, 18, 3, 5, 7, 28, 1, 7...","[[371, 1, 31, 5, 695, 401, 33, 26, 2758, 2759,...","[[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, ..."
tr_c1,"[hey mon, hey hey hey you want to hear somethi...","[neutral, neutral, joy, sadness, surprise, neu...","[Chandler, Monica, Chandler, Monica, Chandler,...","[3, 3, 2, 0, 6, 3, 5, 0, 3, 3, 2, 3, 5, 6, 6, ...","[2, 10, 3, 8, 2, 12, 10, 2, 5, 6, 2, 5, 4, 8, ...","[[28, 509, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, ..."
tr_c10,"[go go go, oh yeah now everybody wants to be u...","[joy, joy, non-neutral, surprise, neutral, neu...","[Ross, Rachel, Phoebe, Monica, Phoebe, Ross]","[2, 2, 5, 6, 3, 3]","[3, 10, 1, 6, 9, 10]","[[43, 43, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, ..."
tr_c100,[ooh oh no i have to go i have a massage appoi...,"[non-neutral, neutral, neutral, joy, non-neutr...","[Phoebe, Eric, Phoebe, Eric, Mona, Ross, Dr. G...","[5, 3, 3, 2, 5, 3, 1, 0]","[12, 13, 13, 7, 10, 6, 19, 10]","[[219, 11, 16, 1, 17, 4, 43, 1, 17, 7, 1328, 1...","[[0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, ..."
tr_c101,"[okay so we will just stay married, yes exactl...","[joy, joy, joy, non-neutral, fear, non-neutral...","[Rachel, Ross, Rachel, Ross, Rachel, Ross, Rac...","[2, 2, 2, 5, 7, 5, 6, 3, 5, 2, 3, 0, 3, 3, 5, ...","[7, 2, 9, 19, 15, 12, 10, 9, 1, 27, 2, 5, 3, 2...","[[22, 23, 18, 41, 25, 254, 186, 0, 0, 0, 0, 0,...","[[0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, ..."
...,...,...,...,...,...,...,...
tr_c95,"[hey joey you wanted to talk to me, i do not k...","[neutral, neutral, neutral, neutral, surprise,...","[Tag, Joey, Tag, Joey, Tag, Joey]","[3, 3, 3, 3, 6, 2]","[8, 11, 6, 13, 1, 3]","[[28, 81, 2, 198, 4, 175, 4, 20, 0, 0, 0, 0, 0...","[[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, ..."
tr_c96,[oh danielle i was not expecting the machine g...,"[surprise, neutral, non-neutral, non-neutral, ...","[Chandler, Monica, Chandler, Ross, Chandler, C...","[6, 3, 5, 5, 7, 7, 5, 6, 1, 4, 4, 4, 5, 1, 5, ...","[21, 2, 5, 5, 2, 12, 21, 12, 9, 6, 9, 9, 4, 7,...","[[11, 3176, 1, 31, 8, 1150, 5, 503, 136, 20, 7...","[[0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 1, 0, 0, ..."
tr_c97,"[y'know they say a watched pot never beeps, it...","[neutral, non-neutral, neutral, anger, neutral...","[Monica, Phoebe, Monica, Phoebe, Monica, Phoeb...","[3, 5, 3, 1, 3, 3, 3, 3, 3, 0, 0, 0, 0]","[8, 20, 12, 7, 6, 1, 6, 5, 2, 2, 2, 19, 26]","[[53, 63, 101, 7, 3180, 3181, 128, 2103, 0, 0,...","[[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, ..."
tr_c98,[ok bye well monica's not coming it is just go...,"[neutral, neutral, neutral, joy, non-neutral, ...","[Ross, Chandler, Ross, Chandler, Ross, Chandle...","[3, 3, 3, 2, 5, 5, 6, 4, 6, 3, 5, 5, 3]","[15, 14, 8, 21, 2, 2, 15, 9, 18, 1, 6, 8, 3]","[[113, 179, 30, 488, 8, 251, 6, 3, 25, 29, 4, ...","[[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, ..."


In [10]:
class MyLabelBinarizer:

    def __init__(self):
        self.lb = LabelBinarizer()

    def fit(self, X):
        # Convert X to array
        X = np.array(X)
        # Fit X using the LabelBinarizer object
        self.lb.fit(X)
        # Save the classes
        self.classes_ = self.lb.classes_

    def fit_transform(self, X):
        # Convert X to array
        X = np.array(X)
        # Fit + transform X using the LabelBinarizer object
        Xlb = self.lb.fit_transform(X)
        # Save the classes
        self.classes_ = self.lb.classes_
        if len(self.classes_) == 2:
            Xlb = np.hstack((Xlb, 1 - Xlb))
        return Xlb

    def transform(self, X):
        # Convert X to array
        X = np.array(X)
        # Transform X using the LabelBinarizer object
        Xlb = self.lb.transform(X)
        if len(self.classes_) == 2:
            Xlb = np.hstack((Xlb, 1 - Xlb))
        return Xlb

    def inverse_transform(self, Xlb):
        # Convert Xlb to array
        Xlb = np.array(Xlb)
        if len(self.classes_) == 2:
            X = self.lb.inverse_transform(Xlb[:, 0])
        else:
            X = self.lb.inverse_transform(Xlb)
        return X

In [11]:
dialogue_train_data['encoded_speaker'] = dialogue_train_data['speaker'].apply(lambda s: MyLabelBinarizer().fit_transform(s))


In [12]:
dialogue_train_data['sequence'] = dialogue_train_data['sequence'].apply(lambda s: np.array(np.array(s)))
dialogue_train_data['encoded_emotion_label'] = dialogue_train_data['encoded_emotion_label'].apply(lambda s: np.array(np.array(s)))
dialogue_train_data['encoded_speaker'] = dialogue_train_data['encoded_speaker'].apply(lambda s: np.array(np.array(s)))

In [13]:
dialogue_train_data.reset_index(inplace=True)

In [14]:
dialogue_train_data.head()


Unnamed: 0,conv_id,sentence,emotion_label,speaker,encoded_emotion_label,sentence_length,sequence,emotion_true,encoded_speaker
0,tr_c0,[also i was the point person on my company's t...,"[neutral, neutral, neutral, neutral, surprise,...","[Chandler, The Interviewer, Chandler, The Inte...","[3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 7, 3, 6, 3, 6, ...","[18, 7, 6, 10, 4, 16, 2, 18, 3, 5, 7, 28, 1, 7...","[[371, 1, 31, 5, 695, 401, 33, 26, 2758, 2759,...","[[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, ...","[[1, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0,..."
1,tr_c1,"[hey mon, hey hey hey you want to hear somethi...","[neutral, neutral, joy, sadness, surprise, neu...","[Chandler, Monica, Chandler, Monica, Chandler,...","[3, 3, 2, 0, 6, 3, 5, 0, 3, 3, 2, 3, 5, 6, 6, ...","[2, 10, 3, 8, 2, 12, 10, 2, 5, 6, 2, 5, 4, 8, ...","[[28, 509, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, ...","[[1, 0, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0], [0,..."
2,tr_c10,"[go go go, oh yeah now everybody wants to be u...","[joy, joy, non-neutral, surprise, neutral, neu...","[Ross, Rachel, Phoebe, Monica, Phoebe, Ross]","[2, 2, 5, 6, 3, 3]","[3, 10, 1, 6, 9, 10]","[[43, 43, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, ...","[[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1,..."
3,tr_c100,[ooh oh no i have to go i have a massage appoi...,"[non-neutral, neutral, neutral, joy, non-neutr...","[Phoebe, Eric, Phoebe, Eric, Mona, Ross, Dr. G...","[5, 3, 3, 2, 5, 3, 1, 0]","[12, 13, 13, 7, 10, 6, 19, 10]","[[219, 11, 16, 1, 17, 4, 43, 1, 17, 7, 1328, 1...","[[0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, ...","[[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 1..."
4,tr_c101,"[okay so we will just stay married, yes exactl...","[joy, joy, joy, non-neutral, fear, non-neutral...","[Rachel, Ross, Rachel, Ross, Rachel, Ross, Rac...","[2, 2, 2, 5, 7, 5, 6, 3, 5, 2, 3, 0, 3, 3, 5, ...","[7, 2, 9, 19, 15, 12, 10, 9, 1, 27, 2, 5, 3, 2...","[[22, 23, 18, 41, 25, 254, 186, 0, 0, 0, 0, 0,...","[[0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, ...","[[0, 1], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0..."


In [15]:
dialogue_train_data.shape

(720, 9)

In [16]:
class CNNExtractor(k.Model):
    def __init__(self, vocab_size, embedding_dim, output_size, max_num_tokens,glv_embedding_matrix, filters, kernel_sizes, dropout):
        super(CNNExtractor, self).__init__()
                
        self.embedding = k.layers.Embedding(input_dim=vocab_size, output_dim= embedding_dim, 
                                            input_length=max_num_tokens, weights = [glv_embedding_matrix])
        self.convs1 = k.layers.Conv1D(filters, 
                                           kernel_sizes[0], 
                                           activation='relu')
        self.convs2 = k.layers.Conv1D(filters, 
                                           kernel_sizes[1], 
                                           activation='relu')
        self.convs3 = k.layers.Conv1D(filters, 
                                           kernel_sizes[2], 
                                           activation='relu')
        
        self.pooling = k.layers.GlobalMaxPooling1D()
        self.concatanate = k.layers.Concatenate()
        self.dropout = k.layers.Dropout(dropout)
        self.dense = k.layers.Dense(output_size, input_shape=(len(kernel_sizes) * filters,), activation='relu')

    def call(self, inputs):
         # input size = (num_words = 250)
         #in the original code the input is equals to (num_utt, batch, num_words)
            
        x = self.embedding(inputs) # x size = (num_words = 250, embedding = 300)
        x = tf.expand_dims(x, axis=-1) # x size =  (num_words = 250, embedding = 300, num_utt * batch = 1)
        x = tf.transpose(x, [2, 1, 0]) # x size =  (num_utt * batch = 1, embedding = 300,num_words = 250 )
        
    
        conv1_x = self.pooling(self.convs1(x)) # conv1_x size =  (num_utt * batch = 1, 50 )
        conv2_x = self.pooling(self.convs2(x)) # conv2_x size =  (num_utt * batch = 1, 50 )
        conv3_x = self.pooling(self.convs3(x)) # conv3_x size =  (num_utt * batch = 1, 50 )
        x = self.concatanate([conv1_x, conv2_x, conv3_x]) # x size =  (num_utt * batch = 1, 150 )
        x = self.dropout(x)
        x = self.dense(x)
        # x size =  (num_utt * batch = 1, output_size = 100 )
        return x

In [17]:
class GlobalGRU(k.layers.Layer):
    def __init__(self, D_g):
        
        super(GlobalGRU, self).__init__()
        self.global_gru = k.layers.GRU(D_g,
                                bias_initializer="ones",
                                dropout=0.1,
                                recurrent_initializer='glorot_uniform')
        self.dropout = k.layers.Dropout(0.5)

# h_P_previous -> previous party state
#t_r _> textual representation
#h_G_previous _> previous global state
    def call(self, t_r, h_P_previous, h_G_previous):
        #print("t_r")
        #print(t_r)
        #print("h_P_previous")
        #print(h_P_previous)
        #print("h_G_previous")
        #print(h_G_previous)
        t_r_h_P = tf.concat([h_P_previous, t_r], axis=-1)
        #print("t_r_h_P")
        #print(t_r_h_P)
        #Global state        
        t_r_h_P = tf.expand_dims(t_r_h_P, axis=-1)
        #print(t_r_h_P.shape)

        output = self.global_gru(t_r_h_P, initial_state=h_G_previous)
        #print(output)
        return self.dropout(output)


In [18]:
class PartyGRU(k.layers.Layer):
    def __init__(self, D_p):
        super(PartyGRU, self).__init__()
        
        self.party_gru = k.layers.GRU(D_p,
                                bias_initializer="ones",
                                dropout=0.1,
                                recurrent_initializer='glorot_uniform')
        self.dropout = k.layers.Dropout(0.5)

#c_t -> current context 
#t_r -> textual representation
#h_P_previous -> previous party state
    def call(self, c_t, t_r, h_P_previous):

        t_r_c_t = tf.concat([c_t, t_r], axis=-1)
        t_r_c_t = tf.expand_dims(t_r_c_t, axis=-1)

        return self.dropout(self.party_gru(t_r_c_t, initial_state=h_P_previous))


In [19]:
class EmotionGRU(k.layers.Layer):
    def __init__(self, D_e):
        super(EmotionGRU, self).__init__()

        self.emotion_gru = k.layers.GRU(D_e,
                                bias_initializer="ones",
                                dropout=0.1,
                                recurrent_initializer='glorot_uniform')
        self.dropout = k.layers.Dropout(0.5)

#h_E_previous -> previous emotion state
#h_P -> current party state
    def call(self, h_P, h_E_previous):
        
        h_P = tf.expand_dims(h_P, axis=-1)
        return self.dropout(self.emotion_gru(h_P, initial_state=h_E_previous))


In [20]:
class EmotionClassificationDense(k.layers.Layer):
    def __init__(self, D_c, n_classes):
        super(EmotionClassificationDense, self).__init__()

        self.classification = k.layers.Dense(2*D_c, activation="relu")

        self.y = k.layers.Dense(n_classes, activation="softmax", kernel_regularizer='l2')

        
    def call(self, h_E):
        output = self.classification(h_E)
        return self.y(output)


In [21]:
class AttentionBlock(k.layers.Layer):
    def __init__(self, D_g):
        super(AttentionBlock, self).__init__()
        self.dense = k.layers.Dense(D_g)

    def call(self, h_G_all, t_r):
        H_g = np.array(h_G_all) #Hg = (1, 150, n_iterations)
        
        t_r =  self.dense(t_r)  #  (1, 1, 150)
        t_r = tf.expand_dims(t_r, 1)
        score = tf.matmul( t_r, H_g, transpose_b=True)
        a_t = tf.nn.softmax(score, axis = 0) # 1, 1, 2
        
        a_t = tf.transpose(a_t, [1, 2, 0]) 
        aux = tf.transpose(H_g, [1,0,2]) # 1 , 2, 150
        c_t = tf.matmul(a_t, aux) 
        return c_t[-1,:,:]

In [53]:
class DialogueRNN(k.Model):
    def __init__(self, D_g, D_p, D_e, D_c, n_classes, vocab_size, embedding_dim, cnn_output_size ,
                 max_num_tokens, glv_embedding_matrix, filters, kernel_sizes, dropout):
        
        super(DialogueRNN, self).__init__()
        
        self.D_g = D_g
        self.D_p = D_p
        self.D_e = D_e
        self.D_c = D_c
        
        self.cnnTextualRepresentation = CNNExtractor(vocab_size, embedding_dim, cnn_output_size, 
                                            max_num_tokens,glv_embedding_matrix, filters, kernel_sizes, dropout)
        
        self.attention = AttentionBlock(D_g)
        self.partyGRU = PartyGRU(D_p)
        self.globalGRU = GlobalGRU(D_g)
        self.emotionGRU = EmotionGRU(D_e)
        
        self.classificationDense = EmotionClassificationDense(D_c, n_classes)

    #for each conversation 
    def call(self, data):  
        
        speakers = data['encoded_speaker']
        messages= data['sequence']
        
        number_of_speakers =  speakers.shape[1]
        print(number_of_speakers)
        speakers_states = []
        for i in range(number_of_speakers):
        #For each speaker initialize HP
            speakers_states.append(tf.zeros((1, self.D_p)))
            
        h_G = tf.zeros((1, self.D_g))
        
        h_G_all = []
        h_G_all.append(h_G)
        
        y_pred_prob_all = []
        y_pred_all = []
        #initialize emotion states
        h_E = tf.zeros((1, self.D_e))
        
#for each coversation it restart the training?

        c = 0
        #for each message 
        for message,speaker in zip(messages, speakers):
            #print(f"message {c}")
            #get the speaker id
            t_r = self.cnnTextualRepresentation(message)

            speakers_id = np.argmax(speaker)
            
            if len(h_G_all)==1:
                c_t = tf.zeros((1, self.D_g))
            else:
                c_t = self.attention(h_G_all, t_r)
                c_t = tf.squeeze(c_t)
                c_t = tf.expand_dims(c_t, 1)
                c_t = tf.transpose(c_t)
            
            h_P_previous = speakers_states[speakers_id] 
  
            h_G = self.globalGRU(t_r, h_P_previous ,h_G)
            h_G_all.append(h_G)

            h_P = self.partyGRU(c_t, t_r, h_P_previous)
            speakers_states[speakers_id] = h_P
            print("Message")
            print(c)
            print("Who talks")
            print(speakers_id)
            h_E =  self.emotionGRU(h_P, h_E)
            y_pred_prob = self.classificationDense(h_E)
            y_pred_prob =  tf.squeeze(y_pred_prob)
            y_pred = np.argmax(y_pred_prob, axis=-1)

            y_pred_prob_all.append(y_pred_prob)
            y_pred_all.append(y_pred)
            c = c+1
         

        return y_pred_prob_all
        

In [54]:
batch_size = 1
n_epochs = 12

D_g = 150
D_p = 150
D_e = 100
D_c = 100

n_classes=8
glv_embedding_matrix = np.load(open('glv_embedding_matrix', 'rb') ,allow_pickle=True)
vocab_size, embedding_dim = glv_embedding_matrix.shape
cnn_output_size=100
max_num_tokens = 250
filters = 50
kernel_sizes = [3,4,5]
dropout = 0.5 

data = dialogue_train_data[['encoded_speaker','sequence']]
y = dialogue_train_data['emotion_true']


model = DialogueRNN(D_g, D_p, D_e, D_c, n_classes, vocab_size, embedding_dim, cnn_output_size , 
        max_num_tokens, glv_embedding_matrix, filters, kernel_sizes, dropout)

model.compile(
          loss      = tf.keras.losses.CategoricalCrossentropy(),
          metrics   = tf.keras.metrics.CategoricalAccuracy(),
          optimizer = tf.keras.optimizers.Adam())
# fit 


In [69]:
np.ravel(data.values)

array([[1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0]])

In [66]:
data.values

array([[array([[1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0]]),
        array([[371,   1,  31, ...,   0,   0,   0],
       [  2, 311,  17, ...,   0,   0,   0],
       [  9,   1,  48, ...,   0,   0,   0],
       ...,
       [  1,  15,  29, ...,   0,   0,   0],
       [  1,  41,  70, ...,   0,   0,   0],
       [ 24, 130,   0, ...,   0,   0,   0]])],
       [array([[1, 0, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
  

In [77]:
data.values


array([[array([[1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [1, 0, 0, 0],
       [0, 0, 0, 1],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0]]),
        array([[371,   1,  31, ...,   0,   0,   0],
       [  2, 311,  17, ...,   0,   0,   0],
       [  9,   1,  48, ...,   0,   0,   0],
       ...,
       [  1,  15,  29, ...,   0,   0,   0],
       [  1,  41,  70, ...,   0,   0,   0],
       [ 24, 130,   0, ...,   0,   0,   0]])],
       [array([[1, 0, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 1, 0],
  

In [78]:
tf.ragged.constant(data.values)

<tf.RaggedTensor [[[[1, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]], [[371, 1, 31, 5, 695, 401, 33, 26, 2758, 2759, 121, 5, 2760, 1078, 4, 2761, 836, 837, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [117]:
#model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              #loss=k.losses.CategoricalCrossentropy(from_logits=False, reduction='none'),
              #metrics=k.metrics.Accuracy())

2.7.0


In [73]:
model.fit(tf.ragged.constant(data.values), y)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

In [None]:
def loss(model, messages, speakers, y):
    lambda_ = 0.1
    loss_object = k.losses.CategoricalCrossentropy(from_logits=False)
    
    y_pred_prob_all, y_pred_all = model(messages, speakers)
    print(y_pred_all)
    result = loss_object(y_true=y, y_pred=y_pred_prob_all)
    
    ys = tf.reduce_mean(result)
    l2_norms = [tf.nn.l2_loss(v) for v in model.trainable_variables]
    l2_norm = tf.reduce_sum(l2_norms)
    cost = ys + lambda_*l2_norm

    print(cost)
    return cost

def grad(model, messages, speakers, y):
    with tf.GradientTape() as tape:
        loss_value = loss(model, messages, speakers, y)
        #print([var.name for var in tape.watched_variables()])

    return loss_value, tape.gradient(loss_value, model.trainable_variables,unconnected_gradients=tf.UnconnectedGradients.ZERO
)


train_loss_results = []
train_accuracy_results = []

num_epochs = 5
#optimizer = tf.keras.optimizers.Adam(learning_rate=1e-06)
optimizer = tfa.optimizers.AdamW(learning_rate=1e-06, weight_decay=0.00001)
for epoch in range(num_epochs):
    epoch_loss_avg = tf.keras.metrics.Mean()
    epoch_accuracy = tf.keras.metrics.Accuracy()

    # Training loop - using batches of 32
    for r in range(dialogue_train_data.shape[0]):
        print("Conversation")
        print(r)

        data = dialogue_train_data.iloc[r]

        messages = data["sequence"]
        speakers = data["encoded_speaker"]
        y = data["emotion_true"]
        y_all = data["encoded_emotion_label"]

        loss_value, grads = grad(model, messages, speakers,  y)
        #print(grads)
        #print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(),
                                          #loss_value.numpy()))
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        
        #print("Step: {},         Loss: {}".format(optimizer.iterations.numpy(),
                                          #loss(model, messages, speakers, y).numpy()))
        
        # Track progress
        epoch_loss_avg.update_state(loss_value)  # Add current batch loss
        # Compare predicted label to actual label
        # training=True is needed only if there are layers with different
        # behavior during training versus inference (e.g. Dropout).
        y_pred_prob_all, y_pred_all =  model( messages, speakers)
        epoch_accuracy.update_state(y_all, y_pred_all)

    # End epoch
    train_loss_results.append(epoch_loss_avg.result())
    train_accuracy_results.append(epoch_accuracy.result())

    if epoch % 5 == 0:
        print("Epoch {:03d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch,
                                                                epoch_loss_avg.result(),
                                                                epoch_accuracy.result()))

Conversation
0
Message
0
Who talks
0
Message
1
Who talks
3
Message
2
Who talks
0
Message
3
Who talks
3
Message
4
Who talks
0
Message
5
Who talks
3
Message
6
Who talks
0
Message
7
Who talks
3
Message
8
Who talks
0
Message
9
Who talks
3
Message
10
Who talks
0
Message
11
Who talks
3
Message
12
Who talks
0
Message
13
Who talks
3
Message
14
Who talks
1
Message
15
Who talks
2
Message
16
Who talks
1
Message
17
Who talks
2
Message
18
Who talks
1
Message
19
Who talks
1
Message
20
Who talks
1
Message
21
Who talks
1
Message
22
Who talks
2
[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
tf.Tensor(11215.799, shape=(), dtype=float32)
Message
0
Who talks
0
Message
1
Who talks
3
Message
2
Who talks
0
Message
3
Who talks
3
Message
4
Who talks
0
Message
5
Who talks
3
Message
6
Who talks
0
Message
7
Who talks
3
Message
8
Who talks
0
Message
9
Who talks
3
Message
10
Who talks
0
Message
11
Who talks
3
Message
12
Who talks
0
Message
13
Who talks
3
Message
14
Who talks
1
Message
15
Who 

In [None]:
loss = (tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=out_layer, labels=tf_train_labels)) +
    0.01*tf.nn.l2_loss(hidden_weights) +
    0.01*tf.nn.l2_loss(out_weights))

In [None]:
fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8))
fig.suptitle('Training Metrics')

axes[0].set_ylabel("Loss", fontsize=14)
axes[0].plot(train_loss_results)

axes[1].set_ylabel("Accuracy", fontsize=14)
axes[1].set_xlabel("Epoch", fontsize=14)
axes[1].plot(train_accuracy_results)
plt.show()

In [None]:
test_accuracy = tf.keras.metrics.Accuracy()
ds_test_batch = ds_test.batch(10)

for (x, y) in ds_test_batch:
  # training=False is needed only if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
  logits = model(x, training=False)
  prediction = tf.argmax(logits, axis=1, output_type=tf.int64)
  test_accuracy(prediction, y)

print("Test set accuracy: {:.3%}".format(test_accuracy.result()))

In [None]:
model.fit(messages, speakers)

NameError: name 'messages' is not defined

In [None]:
optimizer = k.optimizers.Adam()
loss_object = k.losses.CategoricalCrossentropy(from_logits=False, reduction='none')
accuracy_object = k.metrics.Accuracy()

def loss_function(real, y):
    loss_ = loss_object(real, y)
    loss_ = tf.reduce_mean(loss_)
    return loss_ #

def accuracy_score(real, y):
    accuracy_object.reset_states()
    accuracy_object.update_state(real, y)
    return accuracy_object.result()


def train_step(model, dialogue_train_data):
    loss, acc = 0, 0    
    
    with tf.GradientTape() as tape:
        for r in range(dialogue_train_data.shape[0]):
            print("Conversation")
            print(r)

            data = dialogue_train_data.iloc[r]

            messages = data["sequence"]
            speakers = data["encoded_speaker"]
            y = data["emotion_true"]
            y_all = data["encoded_emotion_label"]

            loss = []
            acc = []
             
            y_pred_prob_all, y_pred_all = model(messages, speakers)

            y_true = np.argmax(y, axis=-1)        
            loss += loss_function(y, y_pred_prob_all)
            acc += accuracy_score(y, y_pred_prob_all)
            
            #print(acc)
    batch_loss = loss / (i-1)
    batch_acc = acc / (i-1)
    variables = model.trainable_variables
    
    
    #print([var.name for var in tape.watched_variables()])
    gradients = tape.gradient(loss, variables,
                              unconnected_gradients=tf.UnconnectedGradients.ZERO
                             )
    
    #print(gradients)
    optimizer.apply_gradients(zip(gradients, variables))
    return batch_loss, batch_acc

In [None]:
batch_size = 1
n_epochs = 12

D_g = 150
D_p = 150
D_e = 100
D_c = 100

n_classes=8
glv_embedding_matrix = np.load(open('glv_embedding_matrix', 'rb') ,allow_pickle=True)
vocab_size, embedding_dim = glv_embedding_matrix.shape
cnn_output_size=100
max_num_tokens = 250
filters = 50
kernel_sizes = [3,4,5]
dropout = 0.5 

X = np.column_stack((dialogue_train_data['encoded_speaker'],dialogue_train_data['sequence']))
y = dialogue_train_data['emotion_true'].values


model = DialogueRNN(D_g, D_p, D_e, D_c, n_classes, vocab_size, embedding_dim, cnn_output_size , 
        max_num_tokens, glv_embedding_matrix, filters, kernel_sizes, dropout)
    

for epoch in range(n_epochs):
    start = time.time()

    print("Number of epoch")
    print(epoch)

    batch_loss, batch_acc = train_step(model, dialogue_train_data)
    loss.append(batch_loss)
    acc.append(batch_acc)

    plt.plot(loss, color="blue")
    plt.title("Loss")
    plt.show()
    plt.plot(acc, color="red")
    plt.title("Accuracy")
    plt.show()
    print('Epoch %d Loss %.3f Accuracy %.2f' % (epoch + 1, batch_loss.numpy(), batch_acc.numpy()))
    training_time = (time.time() - start) / 60
    print("Training done in %d min (%d epochs with batches of %d)" % (training_time, n_epochs, batch_size))
    break
#encoder.save_weights("encoder.weights")
#decoder.save_weights("decoder.weights")