In [1]:
import tensorflow as tf
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow_datasets as tfds
import warnings
warnings.filterwarnings('ignore')

In [2]:
path = os.getenv('HOME') + '/aiffel/korean_chatbot/ChatbotData .csv'
data = pd.read_csv(path)
data.head()

Unnamed: 0,Q,A,label
0,12시 땡!,하루가 또 가네요.,0
1,1지망 학교 떨어졌어,위로해 드립니다.,0
2,3박4일 놀러가고 싶다,여행은 언제나 좋죠.,0
3,3박4일 정도 놀러가고 싶다,여행은 언제나 좋죠.,0
4,PPL 심하네,눈살이 찌푸려지죠.,0


In [3]:
raw_questions = data['Q']
questions = []
for question in raw_questions:
    questions.append(question)
    
raw_answers = data['A']
answers = []
for answer in raw_answers:
    answers.append(answer)

In [4]:
from konlpy.tag import Mecab

mecab = Mecab()

def preprocess_sentence(sentence):
    sentence = re.sub(r"([?.!,])", r" \1 ", sentence)
    sentence = re.sub(r'[" "]+', " ", sentence)
    sentence = re.sub("[^가-힣ㄱ-ㅎㅏ-ㅣa-zA-Z?.!,1-9\\s]", "", sentence)
    setence = sentence.strip()
    corpus = mecab.morphs(sentence)
    return corpus

In [5]:
cleaned_corpus = list(set(zip(questions, answers)))

In [6]:
que_corpus = []
ans_corpus = []
for a in range(len(cleaned_corpus)):
    que_sentence = preprocess_sentence(cleaned_corpus[a][1])
    ans_sentence = preprocess_sentence(cleaned_corpus[a][0])
    if len(que_sentence) <=40 and len(ans_sentence) <=40:
        que_corpus.append(que_sentence)
        ans_corpus.append(ans_sentence)

In [7]:
print(type(que_corpus))

<class 'list'>


In [8]:
from gensim.models import Word2Vec
wv = Word2Vec.load(os.getenv('HOME') + '/aiffel/korean_chatbot/ko/ko.bin')
print(wv.most_similar('바나나'))
wv.most_similar('바나나')[0][0]
wv.most_similar('바나나')[0][1]

[('코코넛', 0.8097119331359863), ('시금치', 0.7701147794723511), ('레몬', 0.7688493728637695), ('땅콩', 0.7684734463691711), ('파인애플', 0.7639915943145752), ('녹차', 0.7631460428237915), ('딸기', 0.7617197036743164), ('바닐라', 0.7497864365577698), ('파슬리', 0.7447543144226074), ('코코아', 0.7408243417739868)]


0.8097119331359863

In [9]:
def lexical_sub(sentence, word2vec):
    import random

    res = ""
    toks = sentence

    try:
        _from = random.choice(toks)
        _to = word2vec.most_similar(_from)[0][0]

    except:   # 단어장에 없는 단어
        return sentence

    for tok in toks:
        if tok is _from: res += _to + " "
        else: res += tok + " "
    
    res = mecab.morphs(res)

    return res

In [10]:
def lexical_sub_2(sentence, word2vec):
    import random

    res = ""
    toks = sentence

    try:
        _from = random.choice(toks)
        _to = word2vec.most_similar(_from)[1][0]

    except:   # 단어장에 없는 단어
        return sentence

    for tok in toks:
        if tok is _from: res += _to + " "
        else: res += tok + " "
            
    res = mecab.morphs(res)

    return res

In [11]:
def old_sub(sentence, word2vec):
    import random

    res = ""
    toks = sentence

    try:
        _from = random.choice(toks)
        _to = word2vec.most_similar(_from)[0][1]

    except:   # 단어장에 없는 단어
        return sentence

    for tok in toks:
        if tok is _from: res += _to + " "
        else: res += tok + " "

    return res

In [12]:
from tqdm import tqdm_notebook

new_que_corpus = []

for idx in tqdm_notebook(range(len(que_corpus))):
    old_src = que_corpus[idx]
    new_src = lexical_sub(old_src, wv)
    new_src_2 = lexical_sub_2(old_src, wv)

    if new_src is not None:
        new_que_corpus.append(new_src)
        new_que_corpus.append(new_src_2)
        
    new_que_corpus.append(old_src)


HBox(children=(FloatProgress(value=0.0, max=11750.0), HTML(value='')))




In [13]:
que_corpus[0]

['연락', '하', '면', '지긋지긋', '한', '사람', '으로', '기억', '될', '거', '예요', '.']

In [14]:
print(new_que_corpus[:20])

[['연락', '하', '면', '지긋지긋', '한', '젊은이', '으로', '기억', '될', '거', '예요', '.'], ['연락', '하', '면', '지긋지긋', '한', '사람', '으로', '기억', '될', '거', '예요', '으', '니'], ['연락', '하', '면', '지긋지긋', '한', '사람', '으로', '기억', '될', '거', '예요', '.'], ['많이', '힘든가', '봐요', '는데'], ['많이', '힘든가', '봐요', '으', '니'], ['많이', '힘든가', '봐요', '.'], ['그', '어디', '와', '도', '비교', '하', '지', '마세요', '.'], ['그', '누구', '와', '풀', '어', '쓰', '비교', '하', '지', '마세요', '.'], ['그', '누구', '와', '도', '비교', '하', '지', '마세요', '.'], ['안', '갈', '것', '예요', '.'], ['안', '갈', '듯', '예요', '.'], ['안', '갈', '거', '예요', '.'], ['호감', '이', '없', '으면', '통화', '를', '안', '할', '거', '예요', '.'], ['호감', '이', '없', '으면', '통화', '를', '안', '할', '듯', '예요', '.'], ['호감', '이', '없', '으면', '통화', '를', '안', '할', '거', '예요', '.'], ['당신', '은', '당신', '이', '란', '이유', '로', '벌써', '그럴', '자격', '이', '있', '어요', '.'], ['당신', '은', '당신', '으로', '란', '이유', '로', '이미', '그럴', '자격', '이', '있', '어요', '.'], ['당신', '은', '당신', '이', '란', '이유', '로', '이미', '그럴', '자격', '이', '있', '어요', '.'], ['잠깐', '시원', '한', '바람', '조금', '

In [15]:
from tqdm import tqdm_notebook

new_ans_corpus = []

for idx in tqdm_notebook(range(len(ans_corpus))):
    old_src = ans_corpus[idx]

    new_src = lexical_sub(old_src, wv)
    new_2_src = lexical_sub_2(old_src, wv)

    if new_src is not None:
        new_ans_corpus.append(new_src)
        new_ans_corpus.append(new_2_src)
        
    new_ans_corpus.append(old_src)

HBox(children=(FloatProgress(value=0.0, max=11750.0), HTML(value='')))




In [16]:
print(new_ans_corpus[:20])

[['헤어졌', '는데', '연락', '하', '고', '싶', '어서'], ['헤어졌', '는데', '연락', '하', '는데', '싶', '어'], ['헤어졌', '는데', '연락', '하', '고', '싶', '어'], ['휩쓸리', '네'], ['지치', '테'], ['지치', '네'], ['이', '사람', '만나', '면서', '내', '자존', '감', '이', '떨어지', '는', '것', '같', '아', '안', '만나', '는', '도록', '답', '이', '야', '?'], ['이', '사람', '만나', '면서', '내', '감과', '감', '이', '떨어지', '는', '것', '같', '아', '안', '만나', '는', '게', '답', '이', '야', '?'], ['이', '사람', '만나', '면서', '내', '자존', '감', '이', '떨어지', '는', '것', '같', '아', '안', '만나', '는', '게', '답', '이', '야', '?'], ['너', '는', '못', '놀', '드', '잖아'], ['사이먼', '는', '못', '가', '잖아'], ['너', '는', '못', '가', '잖아'], ['자신', '전', '에', '통화', '하', '면', '썸', '이', '야', '?'], ['자기', '전', '에', '통화', '하', '면', '썸', '으로', '야', '?'], ['자기', '전', '에', '통화', '하', '면', '썸', '이', '야', '?'], ['나', '왜', '좋', '아서', '할까', '?'], ['나', '왜', '좋', '아도', '할까', '?'], ['나', '왜', '좋', '아', '할까', '?'], ['마음속', '이', '정리', '안', '돼'], ['머릿속', '이', '정리', '안', '돼']]


In [17]:
sample_data = ["12", "시", "땡", "!"]

print(["<start>"] + sample_data + ["<end>"])

['<start>', '12', '시', '땡', '!', '<end>']


In [18]:
complete_ans_corpus = []
for corpus in new_ans_corpus:
    clean = ['<start>'] + corpus + ['<end>']
    complete_ans_corpus.append(clean)

In [19]:
def tokenize(corpus):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', num_words = 2**13, oov_token = '<unk>')
    tokenizer.fit_on_texts(corpus)

    tensor = tokenizer.texts_to_sequences(corpus)

    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding='post')

    return tensor, tokenizer

In [20]:
from sklearn.model_selection import train_test_split

dec_tensor, dec_tokenizer = tokenize(complete_ans_corpus)
enc_tensor, enc_tokenizer = tokenize(new_que_corpus)

enc_train, enc_val, dec_train, dec_val = \
train_test_split(enc_tensor, dec_tensor, test_size=0.1)

print(len(enc_train), len(enc_val), len(dec_train), len(dec_val))
print(enc_train.shape)
print(dec_train.shape)

31725 3525 31725 3525
(31725, 40)
(31725, 35)


In [22]:
def positional_encoding(pos, d_model):
    def cal_angle(position, i):
        return position / np.power(10000, int(i) / d_model)

    def get_posi_angle_vec(position):
        return [cal_angle(position, i) for i in range(d_model)]

    sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(pos)])

    sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])
    sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])

    return sinusoid_table

In [23]:
def create_padding_mask(seq):
    seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
    return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)

def create_look_ahead_mask(size):
    mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
    return mask  # (seq_len, seq_len)

In [24]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model

        self.depth = d_model // self.num_heads

        self.W_q = tf.keras.layers.Dense(d_model)
        self.W_k = tf.keras.layers.Dense(d_model)
        self.W_v = tf.keras.layers.Dense(d_model)

        self.linear = tf.keras.layers.Dense(d_model)

    def scaled_dot_product_attention(self, Q, K, V, mask):
        d_k = tf.cast(K.shape[-1], tf.float32)
        QK = tf.matmul(Q, K, transpose_b=True)

        scaled_qk = QK / tf.math.sqrt(d_k)

        if mask is not None: scaled_qk += (mask * -1e9)  

        attentions = tf.nn.softmax(scaled_qk, axis=-1)
        out = tf.matmul(attentions, V)

        return out, attentions


    def split_heads(self, x):
        bsz = x.shape[0]
        split_x = tf.reshape(x, (bsz, -1, self.num_heads, self.depth))
        split_x = tf.transpose(split_x, perm=[0, 2, 1, 3])

        return split_x

    def combine_heads(self, x):
        bsz = x.shape[0]
        combined_x = tf.transpose(x, perm=[0, 2, 1, 3])
        combined_x = tf.reshape(combined_x, (bsz, -1, self.d_model))

        return combined_x


    def call(self, Q, K, V, mask):
        WQ = self.W_q(Q)
        WK = self.W_k(K)
        WV = self.W_v(V)

        WQ_splits = self.split_heads(WQ)
        WK_splits = self.split_heads(WK)
        WV_splits = self.split_heads(WV)

        out, attention_weights = self.scaled_dot_product_attention(
            WQ_splits, WK_splits, WV_splits, mask)

        out = self.combine_heads(out)
        out = self.linear(out)

        return out, attention_weights

In [25]:
class PoswiseFeedForwardNet(tf.keras.layers.Layer):
    def __init__(self, d_model, d_ff):
        super(PoswiseFeedForwardNet, self).__init__()
        self.d_model = d_model
        self.d_ff = d_ff

        self.fc1 = tf.keras.layers.Dense(d_ff, activation='relu')
        self.fc2 = tf.keras.layers.Dense(d_model)

    def call(self, x):
        out = self.fc1(x)
        out = self.fc2(out)

        return out

In [26]:
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, n_heads, d_ff, dropout):
        super(EncoderLayer, self).__init__()

        self.enc_self_attn = MultiHeadAttention(d_model, n_heads)
        self.ffn = PoswiseFeedForwardNet(d_model, d_ff)

        self.norm_1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.norm_2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.do = tf.keras.layers.Dropout(dropout)

    def call(self, x, mask):

        """
        Multi-Head Attention
        """
        residual = x
        out = self.norm_1(x)
        out, enc_attn = self.enc_self_attn(out, out, out, mask)
        out = self.do(out)
        out += residual

        """
        Position-Wise Feed Forward Network
        """
        residual = out
        out = self.norm_2(out)
        out = self.ffn(out)
        out = self.do(out)
        out += residual

        return out, enc_attn

In [27]:
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, d_ff, dropout):
        super(DecoderLayer, self).__init__()

        self.dec_self_attn = MultiHeadAttention(d_model, num_heads)
        self.enc_dec_attn = MultiHeadAttention(d_model, num_heads)

        self.ffn = PoswiseFeedForwardNet(d_model, d_ff)

        self.norm_1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.norm_2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.norm_3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.do = tf.keras.layers.Dropout(dropout)

    def call(self, x, enc_out, look_ahead_mask, padding_mask):

        """
        Masked Multi-Head Attention
        """
        residual = x
        out = self.norm_1(x)
        out, dec_attn = self.dec_self_attn(out, out, out, look_ahead_mask)
        out = self.do(out)
        out += residual

        """
        Multi-Head Attention
        """
        residual = out
        out = self.norm_2(out)
        out, dec_enc_attn = self.dec_self_attn(out, enc_out, enc_out, padding_mask)
        out = self.do(out)
        out += residual

        """
        Position-Wise Feed Forward Network
        """
        residual = out
        out = self.norm_3(out)
        out = self.ffn(out)
        out = self.do(out)
        out += residual

        return out, dec_attn, dec_enc_attn

In [28]:
class Encoder(tf.keras.Model):
    def __init__(self,
                    n_layers,
                    d_model,
                    n_heads,
                    d_ff,
                    dropout):
        super(Encoder, self).__init__()
        self.n_layers = n_layers
        self.enc_layers = [EncoderLayer(d_model, n_heads, d_ff, dropout) 
                        for _ in range(n_layers)]

        self.do = tf.keras.layers.Dropout(dropout)

    def call(self, x, mask):
        out = x

        enc_attns = list()
        for i in range(self.n_layers):
            out, enc_attn = self.enc_layers[i](out, mask)
            enc_attns.append(enc_attn)

        return out, enc_attns

In [29]:
class Decoder(tf.keras.Model):
    def __init__(self,
                    n_layers,
                    d_model,
                    n_heads,
                    d_ff,
                    dropout):
        super(Decoder, self).__init__()
        self.n_layers = n_layers
        self.dec_layers = [DecoderLayer(d_model, n_heads, d_ff, dropout) 
                            for _ in range(n_layers)]


    def call(self, x, enc_out, causality_mask, padding_mask):
        out = x

        dec_attns = list()
        dec_enc_attns = list()
        for i in range(self.n_layers):
            out, dec_attn, dec_enc_attn = \
            self.dec_layers[i](out, enc_out, causality_mask, padding_mask)

            dec_attns.append(dec_attn)
            dec_enc_attns.append(dec_enc_attn)

        return out, dec_attns, dec_enc_attns

In [30]:
class Transformer(tf.keras.Model):
    def __init__(self,
                    n_layers,
                    d_model,
                    n_heads,
                    d_ff,
                    src_vocab_size,
                    tgt_vocab_size,
                    pos_len,
                    dropout=0.2,
                    shared_fc=True,
                    shared_emb=False):
        super(Transformer, self).__init__()

        self.d_model = tf.cast(d_model, tf.float32)

        if shared_emb:
            self.enc_emb = self.dec_emb = \
            tf.keras.layers.Embedding(src_vocab_size, d_model)
        else:
            self.enc_emb = tf.keras.layers.Embedding(src_vocab_size, d_model)
            self.dec_emb = tf.keras.layers.Embedding(tgt_vocab_size, d_model)

        self.pos_encoding = positional_encoding(pos_len, d_model)
        self.do = tf.keras.layers.Dropout(dropout)

        self.encoder = Encoder(n_layers, d_model, n_heads, d_ff, dropout)
        self.decoder = Decoder(n_layers, d_model, n_heads, d_ff, dropout)

        self.fc = tf.keras.layers.Dense(tgt_vocab_size)

        self.shared_fc = shared_fc

        if shared_fc:
            self.fc.set_weights(tf.transpose(self.dec_emb.weights))

    def embedding(self, emb, x):
        seq_len = x.shape[1]

        out = emb(x)

        if self.shared_fc: out *= tf.math.sqrt(self.d_model)

        out += self.pos_encoding[np.newaxis, ...][:, :seq_len, :]
        out = self.do(out)

        return out


    def call(self, enc_in, dec_in, enc_mask, causality_mask, dec_mask):
        enc_in = self.embedding(self.enc_emb, enc_in)
        dec_in = self.embedding(self.dec_emb, dec_in)

        enc_out, enc_attns = self.encoder(enc_in, enc_mask)

        dec_out, dec_attns, dec_enc_attns = \
        self.decoder(dec_in, enc_out, causality_mask, dec_mask)

        logits = self.fc(dec_out)

        return logits, enc_attns, dec_attns, dec_enc_attns

In [31]:
N_LAYERS = 2
D_MODEL = 128
N_HEADS = 8
D_FF = 512
DROPOUT = 0.3
SRC_VOCAB_SIZE = 2**13
TGT_VOCAB_SIZE = 2**13

transformer = Transformer(
    n_layers=N_LAYERS,
    d_model=D_MODEL,
    n_heads=N_HEADS,
    d_ff=D_FF,
    src_vocab_size=SRC_VOCAB_SIZE,
    tgt_vocab_size=TGT_VOCAB_SIZE,
    pos_len=41,
    dropout=DROPOUT,
    shared_fc=True,
    shared_emb=True)

d_model = 512

In [32]:
print(SRC_VOCAB_SIZE)

8192


In [33]:
class LearningRateScheduler(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000):
        super(LearningRateScheduler, self).__init__()

        self.d_model = d_model
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        arg1 = step ** -0.5
        arg2 = step * (self.warmup_steps ** -1.5)

        return (self.d_model ** -0.5) * tf.math.minimum(arg1, arg2)

In [34]:
learning_rate = LearningRateScheduler(d_model)

optimizer = tf.keras.optimizers.Adam(learning_rate,
                                        beta_1=0.9,
                                        beta_2=0.98, 
                                        epsilon=1e-9)

In [35]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_sum(loss_)/tf.reduce_sum(mask)

In [36]:
def generate_masks(inp, tar):
  # Encoder padding mask
    enc_padding_mask = create_padding_mask(inp)

  # Used in the 2nd attention block in the decoder.
  # This padding mask is used to mask the encoder outputs.
    dec_padding_mask = create_padding_mask(inp)

  # Used in the 1st attention block in the decoder.
  # It is used to pad and mask future tokens in the input received by 
  # the decoder.
    look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
    dec_target_padding_mask = create_padding_mask(tar)
    combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)

    return enc_padding_mask, combined_mask, dec_padding_mask

In [37]:
@tf.function()
def train_step(src, tgt, model, optimizer):
    tgt_in = tgt[:, :-1]
    gold = tgt[:, 1:]

    enc_mask, dec_enc_mask, dec_mask = generate_masks(src, tgt_in)

    with tf.GradientTape() as tape:
        predictions, enc_attns, dec_attns, dec_enc_attns = \
        model(src, tgt_in, enc_mask, dec_enc_mask, dec_mask)
        loss = loss_function(gold, predictions)

    gradients = tape.gradient(loss, model.trainable_variables)    
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    return loss, enc_attns, dec_attns, dec_enc_attns

In [42]:
def evaluate(sentence, model, src_tokenizer, tgt_tokenizer):
    sentence = preprocess_sentence(sentence)

    #pieces = enc_tokenizer.encode_as_pieces(sentence)
    tokens = enc_tokenizer.texts_to_sequences(sentence)

    _input = tf.keras.preprocessing.sequence.pad_sequences(tokens,
                                                           padding='post')

    ids = []
    output = tf.expand_dims(['<start>'], 0)
    for i in range(dec_train.shape[-1]):
        enc_padding_mask, combined_mask, dec_padding_mask = \
        generate_masks(_input, output)

        predictions, enc_attns, dec_attns, dec_enc_attns =\
        model(_input, 
              output,
              enc_padding_mask,
              combined_mask,
              dec_padding_mask)

        predicted_id = \
        tf.argmax(tf.math.softmax(predictions, axis=-1)[0, -1]).numpy().item()

        if tgt_tokenizer.eos_id() == predicted_id:
            result = tgt_tokenizer.decode_ids(ids)
            return pieces, result, enc_attns, dec_attns, dec_enc_attns

        ids.append(predicted_id)
        output = tf.concat([output, tf.expand_dims([predicted_id], 0)], axis=-1)

    result = tgt_tokenizer.decode_ids(ids)

    return pieces, result, enc_attns, dec_attns, dec_enc_attns

In [43]:
def translate(sentence, model, src_tokenizer, tgt_tokenizer, plot_attention=False):
    pieces, result, enc_attns, dec_attns, dec_enc_attns = \
    evaluate(sentence, model, src_tokenizer, tgt_tokenizer)

    print('Input: %s' % (sentence))
    print('Predicted translation: {}'.format(result))

    if plot_attention:
        visualize_attention(pieces, result.split(), enc_attns, dec_attns, dec_enc_attns)

In [44]:
sentences = ['지루하다, 놀러가고 싶어.', '오늘 일찍 일어났더니 피곤하다.', '간만에 여자친구랑 데이트 하기로 했어.', '집에 있는다는 소리야.']

In [45]:
import random
from tqdm import tqdm_notebook 

BATCH_SIZE = 64
EPOCHS = 15

for epoch in range(EPOCHS):
    total_loss = 0

    idx_list = list(range(0, enc_train.shape[0], BATCH_SIZE))
    random.shuffle(idx_list)
    t = tqdm_notebook(idx_list)

    for (batch, idx) in enumerate(t):
        batch_loss, enc_attns, dec_attns, dec_enc_attns = \
        train_step(enc_train[idx:idx+BATCH_SIZE],
                    dec_train[idx:idx+BATCH_SIZE],
                    transformer,
                    optimizer)

        total_loss += batch_loss

        t.set_description_str('Epoch %2d' % (epoch + 1))
        t.set_postfix_str('Loss %.4f' % (total_loss.numpy() / (batch + 1)))
    print('Translations' )
    for sentence in sentences:
        translate(sentence, transformer, enc_tokenizer, dec_tokenizer)
    print()
    print('Hyperparameters' )
    print('n_layers :', N_LAYERS)
    print('d_model :', D_MODEL)
    print('n_heads : ', N_HEADS)
    print('d_ff :', D_FF)
    print('dropout :', DROPOUT)
    print()
    print('TRAINING')
    print('Warmup Steps: 4000')
    print('Batch size : 64')
    print('Epoch At : ', epoch+1)

HBox(children=(FloatProgress(value=0.0, max=496.0), HTML(value='')))


Translations


TypeError: Cannot convert 0 to EagerTensor of dtype string