In [214]:
import torch
import json
from keras.preprocessing.sequence import pad_sequences
import pickle
import pandas as pd
from keras.preprocessing.text import Tokenizer
import torch.nn as nn
import numpy as np, pickle, time, argparse
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score, classification_report, precision_recall_fscore_support

import tensorflow as tf
import contractions
from sklearn.preprocessing import LabelBinarizer
import time
import tensorflow.keras as k
import os

In [215]:
"deal with contracted texts"
def expand_text(text):
    expanded_words = []
    text = text.encode('utf-8').decode('cp1252').replace("Â’", "'")
    #text = text.replace("’", "'")
    for word in text.split():
      # using contractions.fix to expand the shotened words
      expanded_words.append(contractions.fix(word))   

    expanded_text = ' '.join(expanded_words)
    return expanded_text



"clean dataset"
def preprocess_text(x):
    for punct in '"!&?.,}-/<>#$%\()*+:;=?@[\\]^_`|\~':
        x = x.replace(punct, ' ')
    x = ' '.join(x.split())
    x = x.lower()
    
    return x


def create_utterances(filename, split):
    sentences, emotion_labels, speakers, conv_id, = [], [], [], []
    
    lengths = []
    with open(filename, 'r', encoding='latin1') as f:
        a = json.load(f)
        for c_id, line in enumerate(a):
            for item in line:
                sentences.append(item['utterance'])
                emotion_labels.append(item['emotion'])
                conv_id.append(split[:2] + '_c' + str(c_id))
                speakers.append(item['speaker'])
            
            # u_id += 1
                
    data = pd.DataFrame(sentences, columns=['sentence'])
    data['sentence'] = data['sentence'].apply(lambda x: expand_text(x))
    data['sentence'] = data['sentence'].apply(lambda x: preprocess_text(x))

    data['emotion_label'] = emotion_labels
    data['speaker'] = speakers
    data['conv_id'] = conv_id

    
    return data


In [216]:
"create embedding"
def load_pretrained_glove():
    print("Loading GloVe model, this can take some time...")
    glv_vector = {}
    f = open('glove.840B.300d.txt', encoding='utf-8')

    for line in f:
        values = line.split()
        word = values[0]
        try:
            coefs = np.asarray(values[1:], dtype='float')
            glv_vector[word] = coefs
        except ValueError:
            continue
    f.close()
    print("Completed loading pretrained GloVe model.")
    return glv_vector

def encode_labels(encoder, l):
    return encoder[l]


if __name__ == '__main__':

    train_data = create_utterances('Friends/friends_train.json', 'train')
    valid_data = create_utterances('Friends/friends_dev.json', 'valid')
    test_data = create_utterances('Friends/friends_test.json', 'test')
    
    ## encode the emotion and dialog act labels ##
    all_emotion_labels =  set(train_data['emotion_label'])
    emotion_label_encoder, emotion_label_decoder = {}, {}


    for i, label in enumerate(all_emotion_labels):
        emotion_label_encoder[label] = i
        emotion_label_decoder[i] = label


    pickle.dump(emotion_label_encoder, open('emotion_label_encoder.pkl', 'wb'))
    pickle.dump(emotion_label_decoder, open('emotion_label_decoder.pkl', 'wb'))

    train_data['encoded_emotion_label'] = train_data['emotion_label'].map(lambda x: encode_labels(emotion_label_encoder, x))
    test_data['encoded_emotion_label'] = test_data['emotion_label'].map(lambda x: encode_labels(emotion_label_encoder, x))
    valid_data['encoded_emotion_label'] = valid_data['emotion_label'].map(lambda x: encode_labels(emotion_label_encoder, x))
    
    
    ## tokenize all sentences ##
    all_text = list(train_data['sentence'])
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(all_text)
    pickle.dump(tokenizer, open('tokenizer.pkl', 'wb'))

    ## convert the sentences into sequences ##
    train_sequence = tokenizer.texts_to_sequences(list(train_data['sentence']))
    valid_sequence = tokenizer.texts_to_sequences(list(valid_data['sentence']))
    test_sequence = tokenizer.texts_to_sequences(list(test_data['sentence']))
    
    train_data['sentence_length'] = [len(item) for item in train_sequence]
    valid_data['sentence_length'] = [len(item) for item in valid_sequence]
    test_data['sentence_length'] = [len(item) for item in test_sequence]
    
    max_num_tokens = 250

    train_sequence = pad_sequences(train_sequence, maxlen=max_num_tokens, padding='post')
    valid_sequence = pad_sequences(valid_sequence, maxlen=max_num_tokens, padding='post')
    test_sequence = pad_sequences(test_sequence, maxlen=max_num_tokens, padding='post')

    train_data['sequence'] = list(train_sequence)
    valid_data['sequence'] = list(valid_sequence)
    test_data['sequence'] = list(test_sequence)
    
   
    
    ## save pretrained embedding matrix ##
    glv_vector = load_pretrained_glove()
    word_vector_length = len(glv_vector['the'])
    word_index = tokenizer.word_index
    inv_word_index = {v: k for k, v in word_index.items()}
    num_unique_words = len(word_index)
    glv_embedding_matrix = np.zeros((num_unique_words+1, word_vector_length))

    for j in range(1, num_unique_words+1):
        try:
            glv_embedding_matrix[j] = glv_vector[inv_word_index[j]]
        except KeyError:
            glv_embedding_matrix[j] = np.random.randn(word_vector_length)/200

    np.ndarray.dump(glv_embedding_matrix, open('glv_embedding_matrix', 'wb'))
    print ('Done. Completed preprocessing.')

Loading GloVe model, this can take some time...
Completed loading pretrained GloVe model.
Done. Completed preprocessing.


In [217]:
train_data['emotion_true'] = pd.get_dummies(train_data['encoded_emotion_label']).values.tolist()


In [218]:
train_data

Unnamed: 0,sentence,emotion_label,speaker,conv_id,encoded_emotion_label,sentence_length,sequence,emotion_true
0,also i was the point person on my company's tr...,neutral,Chandler,tr_c0,3,18,"[371, 1, 31, 5, 695, 401, 33, 26, 2758, 2759, ...","[0, 0, 0, 1, 0, 0, 0, 0]"
1,you must have had your hands full,neutral,The Interviewer,tr_c0,3,7,"[2, 311, 17, 98, 44, 643, 760, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0]"
2,that i did that i did,neutral,Chandler,tr_c0,3,6,"[9, 1, 48, 9, 1, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[0, 0, 0, 1, 0, 0, 0, 0]"
3,so let us talk a little bit about your duties,neutral,The Interviewer,tr_c0,3,10,"[23, 84, 79, 175, 7, 100, 402, 54, 44, 1470, 0...","[0, 0, 0, 1, 0, 0, 0, 0]"
4,my duties all right,surprise,Chandler,tr_c0,0,4,"[26, 1470, 34, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 0, 0, 0, 0, 0, 0, 0]"
...,...,...,...,...,...,...,...,...
10556,you or me,neutral,Chandler,tr_c719,3,3,"[2, 112, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0]"
10557,i got it uh joey women do not have adam's apples,non-neutral,Ross,tr_c719,4,11,"[1, 57, 6, 51, 81, 316, 12, 8, 17, 2757, 5888,...","[0, 0, 0, 0, 1, 0, 0, 0]"
10558,you guys are messing with me right,surprise,Joey,tr_c719,0,7,"[2, 87, 13, 2747, 37, 20, 36, 0, 0, 0, 0, 0, 0...","[1, 0, 0, 0, 0, 0, 0, 0]"
10559,yeah,neutral,All,tr_c719,3,1,"[24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[0, 0, 0, 1, 0, 0, 0, 0]"


In [219]:
train_data['sequence'] = np.array(train_data['sequence'])

In [220]:
dialogue_train_data = train_data.groupby("conv_id").agg(list)


In [252]:
maximum = 0
for i in range(dialogue_train_data.shape[0]):
    if maximum < len(dialogue_train_data.iloc[i]['encoded_emotion_label']):
        ref_max = i
        maximum = len(dialogue_train_data.iloc[i]['encoded_emotion_label'])
print(ref_max)

43


In [253]:
len(dialogue_train_data.iloc[43]['encoded_emotion_label'])

24

In [222]:
class MyLabelBinarizer:

    def __init__(self):
        self.lb = LabelBinarizer()

    def fit(self, X):
        # Convert X to array
        X = np.array(X)
        # Fit X using the LabelBinarizer object
        self.lb.fit(X)
        # Save the classes
        self.classes_ = self.lb.classes_

    def fit_transform(self, X):
        # Convert X to array
        X = np.array(X)
        # Fit + transform X using the LabelBinarizer object
        Xlb = self.lb.fit_transform(X)
        # Save the classes
        self.classes_ = self.lb.classes_
        if len(self.classes_) == 2:
            Xlb = np.hstack((Xlb, 1 - Xlb))
        return Xlb

    def transform(self, X):
        # Convert X to array
        X = np.array(X)
        # Transform X using the LabelBinarizer object
        Xlb = self.lb.transform(X)
        if len(self.classes_) == 2:
            Xlb = np.hstack((Xlb, 1 - Xlb))
        return Xlb

    def inverse_transform(self, Xlb):
        # Convert Xlb to array
        Xlb = np.array(Xlb)
        if len(self.classes_) == 2:
            X = self.lb.inverse_transform(Xlb[:, 0])
        else:
            X = self.lb.inverse_transform(Xlb)
        return X

In [223]:
dialogue_train_data['encoded_speaker'] = dialogue_train_data['speaker'].apply(lambda s: MyLabelBinarizer().fit_transform(s))


In [224]:
dialogue_train_data['sequence'] = dialogue_train_data['sequence'].apply(lambda s: np.array(np.array(s)))
dialogue_train_data['encoded_emotion_label'] = dialogue_train_data['encoded_emotion_label'].apply(lambda s: np.array(np.array(s)))
dialogue_train_data['encoded_speaker'] = dialogue_train_data['encoded_speaker'].apply(lambda s: np.array(np.array(s)))

In [225]:
dialogue_train_data.reset_index(inplace=True)

In [226]:
dialogue_train_data.head()


Unnamed: 0,conv_id,sentence,emotion_label,speaker,encoded_emotion_label,sentence_length,sequence,emotion_true,encoded_speaker
0,tr_c0,[also i was the point person on my company's t...,"[neutral, neutral, neutral, neutral, surprise,...","[Chandler, The Interviewer, Chandler, The Inte...","[3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 5, 3, 0, 3, 0, ...","[18, 7, 6, 10, 4, 16, 2, 18, 3, 5, 7, 28, 1, 7...","[[371, 1, 31, 5, 695, 401, 33, 26, 2758, 2759,...","[[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, ...","[[1, 0, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0,..."
1,tr_c1,"[hey mon, hey hey hey you want to hear somethi...","[neutral, neutral, joy, sadness, surprise, neu...","[Chandler, Monica, Chandler, Monica, Chandler,...","[3, 3, 7, 2, 0, 3, 4, 2, 3, 3, 7, 3, 4, 0, 0, ...","[2, 10, 3, 8, 2, 12, 10, 2, 5, 6, 2, 5, 4, 8, ...","[[28, 509, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, ...","[[1, 0, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0], [0,..."
2,tr_c10,"[go go go, oh yeah now everybody wants to be u...","[joy, joy, non-neutral, surprise, neutral, neu...","[Ross, Rachel, Phoebe, Monica, Phoebe, Ross]","[7, 7, 4, 0, 3, 3]","[3, 10, 1, 6, 9, 10]","[[43, 43, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1,..."
3,tr_c100,[ooh oh no i have to go i have a massage appoi...,"[non-neutral, neutral, neutral, joy, non-neutr...","[Phoebe, Eric, Phoebe, Eric, Mona, Ross, Dr. G...","[4, 3, 3, 7, 4, 3, 6, 2]","[12, 13, 13, 7, 10, 6, 19, 10]","[[219, 11, 16, 1, 17, 4, 43, 1, 17, 7, 1328, 1...","[[0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, ...","[[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 1..."
4,tr_c101,"[okay so we will just stay married, yes exactl...","[joy, joy, joy, non-neutral, fear, non-neutral...","[Rachel, Ross, Rachel, Ross, Rachel, Ross, Rac...","[7, 7, 7, 4, 5, 4, 0, 3, 4, 7, 3, 2, 3, 3, 4, ...","[7, 2, 9, 19, 15, 12, 10, 9, 1, 27, 2, 5, 3, 2...","[[22, 23, 18, 41, 25, 254, 186, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, ...","[[0, 1], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0..."


In [227]:
dialogue_train_data.iloc[165]

conv_id                                                            tr_c247
sentence                 [yes fran, i know what time it is but i am loo...
emotion_label            [neutral, anger, non-neutral, non-neutral, neu...
speaker                  [Chandler, Chandler, Chandler, Chandler, Chand...
encoded_emotion_label                                [3, 6, 4, 4, 3, 7, 0]
sentence_length                                    [2, 18, 4, 16, 3, 1, 1]
sequence                 [[90, 3812, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
emotion_true             [[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, ...
encoded_speaker                        [[0], [0], [0], [0], [0], [0], [0]]
Name: 165, dtype: object

In [228]:
X = np.column_stack((dialogue_train_data['encoded_speaker'],dialogue_train_data['sequence']))

In [229]:
y = dialogue_train_data['emotion_true'].values

In [230]:
class CNNExtractor(k.Model):
    def __init__(self, vocab_size, embedding_dim, output_size,max_num_tokens,glv_embedding_matrix, filters, kernel_sizes, dropout):
        super(CNNExtractor, self).__init__()
                
        self.embedding = k.layers.Embedding(input_dim=vocab_size, output_dim= embedding_dim, 
                                            input_length=max_num_tokens, weights = [glv_embedding_matrix])
        self.convs1 = k.layers.Conv1D(filters, 
                                           kernel_sizes[0], 
                                           activation='relu')
        self.convs2 = k.layers.Conv1D(filters, 
                                           kernel_sizes[1], 
                                           activation='relu')
        self.convs3 = k.layers.Conv1D(filters, 
                                           kernel_sizes[2], 
                                           activation='relu')
        
        self.pooling = k.layers.GlobalMaxPooling1D()
        self.concatanate = k.layers.Concatenate()
        self.dropout = k.layers.Dropout(dropout)
        self.dense = k.layers.Dense(output_size, input_shape=(len(kernel_sizes) * filters,), activation='relu')

    def call(self, inputs):
         # input size = (num_words = 250)
         #in the original code the input is equals to (num_utt, batch, num_words)
            
        x = self.embedding(inputs) # x size = (num_words = 250, embedding = 300)
        x = tf.expand_dims(x, axis=-1) # x size =  (num_words = 250, embedding = 300, num_utt * batch = 1)
        x = tf.transpose(x, [2, 1, 0]) # x size =  (num_utt * batch = 1, embedding = 300,num_words = 250 )
        
    
        conv1_x = self.pooling(self.convs1(x)) # conv1_x size =  (num_utt * batch = 1, 50 )
        conv2_x = self.pooling(self.convs2(x)) # conv2_x size =  (num_utt * batch = 1, 50 )
        conv3_x = self.pooling(self.convs3(x)) # conv3_x size =  (num_utt * batch = 1, 50 )
        x = self.concatanate([conv1_x, conv2_x, conv3_x]) # x size =  (num_utt * batch = 1, 150 )
        x = self.dropout(x)
        x = self.dense(x)
        # x size =  (num_utt * batch = 1, output_size = 100 )
        return x

In [231]:
class GlobalGRU(k.layers.Layer):
    def __init__(self, D_g):
        super(GlobalGRU, self).__init__()
        #not sure about this
        self.global_gru = k.layers.GRU(D_g,
                                #return_sequences=True,
                                #return_state=True,
                                bias_initializer="ones",
                                dropout=0.1,
                                recurrent_initializer='glorot_uniform')

# h_P_previous -> previous party state
#t_r _> textual representation
#h_G_previous _> previous global state
    def call(self, t_r, h_P_previous, h_G_previous):
        t_r_h_P = tf.concat([h_P_previous, t_r], axis=-1)
        #Global state        
        t_r_h_P = tf.expand_dims(t_r_h_P, axis=-1)
        output = self.global_gru(t_r_h_P, initial_state=h_G_previous)
        return output


In [232]:
class PartyGRU(k.layers.Layer):
    def __init__(self, D_p):
        super(PartyGRU, self).__init__()
        
        self.party_gru = k.layers.GRU(D_p,
                                #return_sequences=True,
                                #return_state=True,
                                bias_initializer="ones",
                                dropout=0.1,
                                recurrent_initializer='glorot_uniform')


#c_t -> current context 
#t_r -> textual representation
#h_P_previous -> previous party state
    def call(self, c_t, t_r, h_P_previous):

        t_r_c_t = tf.concat([c_t, t_r], axis=-1)
        
        t_r_c_t = tf.expand_dims(t_r_c_t, axis=-1)

        return self.party_gru(t_r_c_t, initial_state=h_P_previous)


In [233]:
class EmotionGRU(k.layers.Layer):
    def __init__(self, D_e):
        super(EmotionGRU, self).__init__()

        self.emotion_gru = k.layers.GRU(D_e,
                                bias_initializer="ones",
                                dropout=0.1,
                                recurrent_initializer='glorot_uniform')

#h_E_previous -> previous emotion state
#h_P -> current party state
    def call(self, h_P, h_E_previous):
        
        h_P = tf.expand_dims(h_P, axis=-1)
        return self.emotion_gru(h_P, initial_state=h_E_previous)


In [234]:
class EmotionClassificationDense(k.layers.Layer):
    def __init__(self, D_c, n_classes):
        super(EmotionClassificationDense, self).__init__()

        self.classification = k.layers.Dense(2*D_c, activation="relu")

        self.y = k.layers.Dense(n_classes, activation="softmax")

        
    def call(self, h_E):
        output = self.classification(h_E)
        return self.y(output)


In [235]:
class AttentionBlock(k.layers.Layer):
    def __init__(self, D_g):
        super(AttentionBlock, self).__init__()
        self.dense = k.layers.Dense(D_g)

    def call(self, h_G_all, t_r):
        H_g = np.array(h_G_all) #Hg = (1, 150, n_iterations)
        t_r =  self.dense(t_r)  # x = (1,1, 150)
        
        
        t_r = tf.expand_dims(t_r, 1) 
        score = tf.matmul(t_r, H_g, transpose_b=True)
        a_t = tf.nn.softmax(score, axis=2)
        c_t = tf.matmul(a_t, H_g)
        return c_t


In [236]:
optimizer = k.optimizers.Adam()
loss_object = k.losses.CategoricalCrossentropy(from_logits=False, reduction='none')
accuracy_object = k.metrics.Accuracy()

def loss_function(real, y):
    loss_ = loss_object(real, y)
    return tf.reduce_mean(loss_)

def accuracy_score(real, y):
    accuracy_object.reset_states()
    accuracy_object.update_state(real, y)
    return accuracy_object.result()

In [240]:
def train_step(X_train, y_train, D_g, D_p, D_e, max_num_tokens):
    loss, acc = 0, 0
    
    
    with tf.GradientTape() as tape:

        # Create an empty list
        speakers_states = []
        #number of speakers in the dialogue
        number_of_speakers = max(np.argmax(X_train['speakers'][0], axis=1)) + 1
        # Iterate over a sequence of numbers from 0 to 4
        for i in range(number_of_speakers):
        #For each speaker initialize HP
            speakers_states.append( tf.zeros((1, D_p)))
              
        #initialize global states
        #------------try to change for np.array after----------
        h_G_all = []
        h_G = tf.ones((1, D_g))
        h_G_all.append(h_G)
        
        #initialize emotion states
        h_E = tf.zeros((1, D_e))

        
        j = 0
        #iterate into every message sent by each party in a dialogue
        for k in range(len(X_train['speakers'][0])):
         
            #get the speaker id
            speaker = X_train['speakers'][0][k]
            speakers_id = np.argmax(speaker)
                        
            #get message said by speaker
            message = X_train['message'][0][k]
            
            #turn message (id representations) into textual vector representation
            t_r = cnnTextualRepresentation(message)
            
            c_t = tf.zeros((1, D_g))
#             if len(h_G_all) == 1:
#                 c_t = tf.zeros((1, D_g))
#             else:
#                 c_t = attention(h_G_all, t_r)
#                 c_t = tf.squeeze(c_t)
            

            #If there is a track of the speaker
        
        #NOT THIS - NOT SPEAKER STATE FOR GLOBAL GRU
            h_P_previous = speakers_states[speakers_id] 
            
            h_G = globalGRU(t_r, h_P_previous ,h_G)
            h_G_all.append(h_G)
            
            h_P = partyGRU(c_t, t_r, h_P_previous)
            speakers_states[speakers_id] = h_P
            
            h_E =  emotionGRU(h_P, h_E)
            y_pred_prob = classificationDense(h_E)
            y_pred_prob =  tf.squeeze(y_pred_prob)
            
            y_pred = np.argmax(y_pred_prob, axis=-1)
            y_true_prob = y_train[0][k] 
            
            y_true = np.argmax(y_true_prob, axis=-1)            
    
            loss += loss_function(y_true_prob, y_pred_prob)
            
            print(loss)
            
            acc += accuracy_score(y_true, y_pred)
            
            print(acc)
            j = j + 1

    batch_loss = loss / (i-1)
    batch_acc = acc / (i-1)
    variables = partyGRU.trainable_variables + globalGRU.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    return batch_loss, batch_acc


In [241]:
glv_embedding_matrix = np.load(open('glv_embedding_matrix', 'rb') ,allow_pickle=True)
vocab_size, embedding_dim = glv_embedding_matrix.shape

In [242]:
batch_size = 1
n_epochs = 12

D_g = 150
D_p = 150
D_e = 100
D_c = 100

n_classes=8
max_num_tokens = 250
kernel_sizes = [3,4,5]
glv_embedding_matrix = np.load(open('glv_embedding_matrix', 'rb') ,allow_pickle=True)
vocab_size, embedding_dim = glv_embedding_matrix.shape
cnn_output_size=100

cnnTextualRepresentation = CNNExtractor(vocab_size, embedding_dim, cnn_output_size,max_num_tokens,glv_embedding_matrix, 50, [3,4,5], 0.5)
attention = AttentionBlock(D_g)
partyGRU = PartyGRU(D_p)
globalGRU = GlobalGRU(D_g)
emotionGRU = EmotionGRU(D_e)
classificationDense = EmotionClassificationDense(D_c, n_classes)

def generator():
    for s1, s2, l in zip(X[:, 0], X[:, 1], y):
        yield {"speakers": s1, "message": s2}, l
    
dataset = tf.data.Dataset.from_generator(generator, output_types=({"speakers": tf.int64, "message": tf.int64}, tf.int64))
dataset = dataset.batch(batch_size)


loss = []
acc = []
start = time.time()
for epoch in range(n_epochs):
    for (batch, (X_train, y_train)) in enumerate(dataset.take(20)):
        print("Dialogue Number: " + str(batch))
    
        batch_loss, batch_acc = train_step(X_train, y_train, D_g, D_p,D_e, max_num_tokens)
        loss.append(batch_loss)
        acc.append(batch_acc)
        if batch % 16 == 0 or batch == 0:
            clear_output()
            plt.plot(loss, color="blue")
            plt.title("Loss")
            plt.show()
            plt.plot(acc, color="red")
            plt.title("Accuracy")
            plt.show()
            print('Epoch %d Batch %d Loss %.3f Accuracy %.2f' % (epoch + 1, batch, batch_loss.numpy(), batch_acc.numpy()))
training_time = (time.time() - start) / 60
print("Training done in %d min (%d epochs with batches of %d)" % (training_time, n_epochs, batch_size))
#encoder.save_weights("encoder.weights")
#decoder.save_weights("decoder.weights")

Dialogue Number: 0
tf.Tensor(1.7591205, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(3.5153334, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(5.271248, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(7.027135, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(10.1504345, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(11.906318, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(13.66221, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(15.418099, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(17.173994, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(18.929886, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(21.652588, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(23.408468, shape=(), dtype=

NameError: name 'clear_output' is not defined