In [None]:
import tensorflow as tf

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split

import unicodedata
import re
import numpy as np
import os
import io
import time
import jieba

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
print(os.listdir("/content/drive/MyDrive"))

['news-commentary-v13.zh-en.zh', 'news-commentary-v13.zh-en.en', 'Colab Notebooks', 'sentimental-analysis', 'cmn.txt', 'cmn-eng.zip', 'ckpt-4.data-00000-of-00001']


In [None]:
en_path_to_file = "/content/drive/MyDrive/news-commentary-v13.zh-en.en"
zh_path_to_file = "/content/drive/MyDrive/news-commentary-v13.zh-en.zh"

In [None]:
# Converts the unicode file to ascii
def unicode_to_ascii(s):
  return ''.join(c for c in unicodedata.normalize('NFD', s)
                 if unicodedata.category(c) != 'Mn')


def preprocess_sentence(w):
#   w = unicode_to_ascii(w.lower().strip())
  w = re.sub(r"([?.!,¿])", r" \1 ", w)
  w = re.sub(r'[" "]+', " ", w)

  # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
  w = re.sub(r"[^a-zA-Z\u4e00-\u9fa5?.!,¿]+", " ", w)
  w = w.strip()

  w = '<start> ' + w + ' <end>'
  return w
def zh_preprocess_sentence(w):
    w = w[:-1]
    w = re.sub(r"([?.!,¿])", r" \1 ", w)
    w = re.sub(r'[" "]+', " ", w)

    # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    w = re.sub(r"[^0-9a-zA-Z\u4e00-\u9fa5?.!,¿，。？！；]+", " ", w)
    w = w.strip()
    w = " ".join(jieba.cut(w))
    w = '<start> ' + w + " <end>"
    return w

In [None]:
def create_dataset(en_path, zh_path, num_examples):
    lines = io.open(en_path, encoding='UTF8').readlines()
    en = [preprocess_sentence(sentence) for sentence in lines[:num_examples]]
    lines = io.open(zh_path, encoding='UTF8').readlines()
    
    zh = [zh_preprocess_sentence(sentence) for sentence in lines[:num_examples]]

    return  en, zh

In [None]:
en, zh = create_dataset(en_path_to_file, zh_path_to_file, None)

Building prefix dict from the default dictionary ...
Dumping model to file cache /tmp/jieba.cache
Loading model cost 0.784 seconds.
Prefix dict has been built successfully.


In [None]:
w = "2020年过去了，2021年来了"
w = re.sub(r"([?.!,¿])", r" \1 ", w)
w = re.sub(r'[" "]+', " ", w)

# replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
w = re.sub(r"[^0-9a-zA-Z\u4e00-\u9fa5?.!,¿，。？！；]+", " ", w)
print(w)
w = w.strip()
w = " ".join(jieba.cut(w))
print(w)

2020年过去了，2021年来了
2020 年 过去 了 ， 2021 年 来 了


In [None]:
zh[:10]

['<start> 1929 年 还是 1989 年   ? <end>',
 '<start> 巴黎   随着 经济危机 不断 加深 和 蔓延 ， 整个 世界 一直 在 寻找 历史 上 的 类似 事件 希望 有助于 我们 了解 目前 正在 发生 的 情况 。 <end>',
 '<start> 一 开始 ， 很多 人 把 这次 危机 比作 1982 年 或 1973 年 所 发生 的 情况 ， 这样 得 类比 是 令人 宽心 的 ， 因为 这 两段 时期 意味着 典型 的 周期性 衰退 。 <end>',
 '<start> 如今 人们 的 心情 却是 沉重 多 了 ， 许多 人 开始 把 这次 危机 与 1929 年 和 1931 年 相比 ， 即使 一些 国家 政府 的 表现 仍然 似乎 把视 目前 的 情况 为 是 典型 的 而 看见 的 衰退 。 <end>',
 '<start> 目前 的 趋势 是 ， 要么 是 过度 的 克制   欧洲   ， 要么 是 努力 的 扩展   美国   。 <end>',
 '<start> 欧洲 在 避免 债务 和 捍卫 欧元 的 名义 下正 变得 谨慎 ， 而 美国 已经 在 许多 方面 行动 起来 ， 以 利用 这一 理想 的 时机 来 实行 急需 的 结构性 改革 。 <end>',
 '<start> 然而 ， 作为 地域 战略 学家 ， 无论是 从 政治 意义 还是 从 经济 意义 上 ， 让 我 自然 想到 的 年份 是 1989 年 。 <end>',
 '<start> 当然 ， 雷曼 兄弟 公司 的 倒闭 和 柏林墙 的 倒塌 没有 任何 关系 。 <end>',
 '<start> 事实上 ， 从 表面 上 看 ， 两者 似乎 是 完全 是 相反 的   一个 是 象征 着 压抑 和 人为 分裂 的 柏林墙 的 倒塌 ， 而 另 一个 是 看似 坚不可摧 的 并 令人 安心 的 金融 资本主义 机构 的 倒塌 。 <end>',
 '<start> 然而 ， 和 1989 年 一样 ， 2008   2009 年 很 可能 也 能 被 视为 一个 划时代 的 改变 ， 其 带来 的 发人深省 的 后果 将 在 几十年 后 仍 能 让 我们 感受 得到 。 <end>']

In [None]:
def tokenize(lang):
  lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
  lang_tokenizer.fit_on_texts(lang)

  tensor = lang_tokenizer.texts_to_sequences(lang)

  tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,
                                                         padding='post')

  return tensor, lang_tokenizer

In [None]:

def load_dataset(en_path_to_file, zh_path_to_file, num_examples=None):
  # creating cleaned input, output pairs
  targ_lang, inp_lang = create_dataset(en_path_to_file, zh_path_to_file, num_examples)

  input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
  target_tensor, targ_lang_tokenizer = tokenize(targ_lang)

  return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer

In [None]:

# Try experimenting with the size of that dataset
num_examples = 30000
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(en_path_to_file, zh_path_to_file, num_examples)

# Calculate max_length of the target tensors
max_length_targ, max_length_inp = target_tensor.shape[1], input_tensor.shape[1]

In [None]:

# Creating training and validation sets using an 80-20 split
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)

# Show length
print(len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val))

24000 24000 6000 6000


In [None]:
def convert(lang, tensor):
  for t in tensor:
    if t != 0:
      print(f'{t} ----> {lang.index_word[t]}')

In [None]:
print("Input Language; index to word mapping")
convert(inp_lang, input_tensor_train[0])
print()
print("Target Language; index to word mapping")
convert(targ_lang, target_tensor_train[0])

Input Language; index to word mapping
3 ----> <start>
6 ----> 在
13 ----> 对
2791 ----> 纳米技术
302 ----> 领导
418 ----> 地位
1 ----> 的
48 ----> 全球
619 ----> 竞争
22 ----> 中
2 ----> ，
190 ----> 只有
154 ----> 那些
13 ----> 对
40 ----> 其
4483 ----> 危险性
24 ----> 有
135 ----> 很
402 ----> 好
1 ----> 的
604 ----> 认识
214 ----> 并且
86 ----> 支持
721 ----> 必要
1 ----> 的
221 ----> 研究
69 ----> 以
125 ----> 把
131 ----> 这种
4483 ----> 危险性
3672 ----> 降到
2203 ----> 最低
391 ----> 程度
1 ----> 的
16 ----> 国家
414 ----> 才
29 ----> 会
100 ----> 成为
370 ----> 最后
1 ----> 的
2204 ----> 赢家
5 ----> 。
4 ----> <end>

Target Language; index to word mapping
3 ----> <start>
9 ----> in
1 ----> the
52 ----> global
1988 ----> race
14 ----> for
2914 ----> nanotechnology
340 ----> leadership
2 ----> ,
1 ----> the
3008 ----> winners
26 ----> will
16 ----> be
115 ----> those
72 ----> who
839 ----> understand
1 ----> the
485 ----> risks
8 ----> and
152 ----> support
1 ----> the
445 ----> research
612 ----> necessary
7 ----> to
3584 ----> minimize
111 --

In [None]:
BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len(input_tensor_train)//BATCH_SIZE
embedding_dim = 128
units = 512
vocab_inp_size = len(inp_lang.word_index)+1
vocab_tar_size = len(targ_lang.word_index)+1

dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [None]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.enc_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

  def call(self, x, hidden):
    x = self.embedding(x)
    output, state = self.gru(x, initial_state=hidden)
    return output, state

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_sz, self.enc_units))



In [None]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)



In [None]:

class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
    # query hidden state shape == (batch_size, hidden size)
    # query_with_time_axis shape == (batch_size, 1, hidden size)
    # values shape == (batch_size, max_len, hidden size)
    # we are doing this to broadcast addition along the time axis to calculate the score
    query_with_time_axis = tf.expand_dims(query, 1)

    # score shape == (batch_size, max_length, 1)
    # we get 1 at the last axis because we are applying score to self.V
    # the shape of the tensor before applying self.V is (batch_size, max_length, units)
    score = self.V(tf.nn.tanh(
        self.W1(query_with_time_axis) + self.W2(values)))

    # attention_weights shape == (batch_size, max_length, 1)
    attention_weights = tf.nn.softmax(score, axis=1)

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights


In [None]:

class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)

    # used for attention
    self.attention = BahdanauAttention(self.dec_units)

  def call(self, x, hidden, enc_output):
    # enc_output shape == (batch_size, max_length, hidden_size)
    context_vector, attention_weights = self.attention(hidden, enc_output)

    # x shape after passing through embedding == (batch_size, 1, embedding_dim)
    x = self.embedding(x)

    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    # passing the concatenated vector to the GRU
    output, state = self.gru(x)

    # output shape == (batch_size * 1, hidden_size)
    output = tf.reshape(output, (-1, output.shape[2]))

    # output shape == (batch_size, vocab)
    x = self.fc(output)

    return x, state, attention_weights

In [None]:
decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

In [None]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,
                                                            reduction='none')


def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

In [None]:

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

In [None]:

@tf.function
def train_step(inp, targ, enc_hidden):
  loss = 0

  with tf.GradientTape() as tape:
    enc_output, enc_hidden = encoder(inp, enc_hidden)

    dec_hidden = enc_hidden

    dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)

    # Teacher forcing - feeding the target as the next input
    for t in range(1, targ.shape[1]):
      # passing enc_output to the decoder
      predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

      loss += loss_function(targ[:, t], predictions)

      # using teacher forcing
      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))

  variables = encoder.trainable_variables + decoder.trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

In [None]:
EPOCHS = 10

for epoch in range(EPOCHS):
  start = time.time()

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
    batch_loss = train_step(inp, targ, enc_hidden)
    total_loss += batch_loss

    if batch % 100 == 0:
      print(f'Epoch {epoch+1} Batch {batch} Loss {batch_loss.numpy():.4f}')
  # saving (checkpoint) the model every 2 epochs
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix=checkpoint_prefix)

  print(f'Epoch {epoch+1} Loss {total_loss/steps_per_epoch:.4f}')
  print(f'Time taken for 1 epoch {time.time()-start:.2f} sec\n')

Epoch 1 Batch 0 Loss 2.4356
Epoch 1 Batch 100 Loss 1.5084
Epoch 1 Batch 200 Loss 1.5473
Epoch 1 Batch 300 Loss 1.5258
Epoch 1 Loss 1.5894
Time taken for 1 epoch 415.07 sec

Epoch 2 Batch 0 Loss 1.2561
Epoch 2 Batch 100 Loss 1.5227
Epoch 2 Batch 200 Loss 1.5879
Epoch 2 Batch 300 Loss 1.2781
Epoch 2 Loss 1.4534
Time taken for 1 epoch 309.08 sec

Epoch 3 Batch 0 Loss 1.3145
Epoch 3 Batch 100 Loss 1.4520
Epoch 3 Batch 200 Loss 1.4190
Epoch 3 Batch 300 Loss 1.3531
Epoch 3 Loss 1.3815
Time taken for 1 epoch 309.30 sec

Epoch 4 Batch 0 Loss 1.2197
Epoch 4 Batch 100 Loss 1.3635
Epoch 4 Batch 200 Loss 1.1740
Epoch 4 Batch 300 Loss 1.3564
Epoch 4 Loss 1.3174
Time taken for 1 epoch 309.35 sec

Epoch 5 Batch 0 Loss 1.1783
Epoch 5 Batch 100 Loss 1.2386
Epoch 5 Batch 200 Loss 1.2464
Epoch 5 Batch 300 Loss 1.2403
Epoch 5 Loss 1.2565
Time taken for 1 epoch 308.88 sec

Epoch 6 Batch 0 Loss 1.3047
Epoch 6 Batch 100 Loss 1.2343
Epoch 6 Batch 200 Loss 1.2250
Epoch 6 Batch 300 Loss 1.0828
Epoch 6 Loss 1.19

In [None]:

def evaluate(sentence):
  attention_plot = np.zeros((max_length_targ, max_length_inp))

  sentence = preprocess_sentence(sentence)

  inputs = [inp_lang.word_index[i] for i in sentence.split(' ')]
  inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                         maxlen=max_length_inp,
                                                         padding='post')
  inputs = tf.convert_to_tensor(inputs)

  result = ''

  hidden = [tf.zeros((1, units))]
  enc_out, enc_hidden = encoder(inputs, hidden)

  dec_hidden = enc_hidden
  dec_input = tf.expand_dims([targ_lang.word_index['<start>']], 0)

  for t in range(max_length_targ):
    predictions, dec_hidden, attention_weights = decoder(dec_input,
                                                         dec_hidden,
                                                         enc_out)

    # storing the attention weights to plot later on
    attention_weights = tf.reshape(attention_weights, (-1, ))
    attention_plot[t] = attention_weights.numpy()

    predicted_id = tf.argmax(predictions[0]).numpy()

    result += targ_lang.index_word[predicted_id] + ' '

    if targ_lang.index_word[predicted_id] == '<end>':
      return result, sentence, attention_plot

    # the predicted ID is fed back into the model
    dec_input = tf.expand_dims([predicted_id], 0)

  return result, sentence, attention_plot

In [None]:

# function for plotting the attention weights
def plot_attention(attention, sentence, predicted_sentence):
  plt.rcParams['font.family'] = ['sans-serif'] 
  fig = plt.figure(figsize=(10, 10))
  ax = fig.add_subplot(1, 1, 1)
  ax.matshow(attention, cmap='viridis')

  fontdict = {'fontsize': 14}

  ax.set_xticklabels([''] + sentence, fontdict=fontdict, rotation=90)
  ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict)

  ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
  ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

  plt.show()

In [None]:
def translate(sentence):
  sentence = " ".join(jieba.cut(sentence))
  result, sentence, attention_plot = evaluate(sentence)

  print('Input:', sentence)
  print('Predicted translation:', result)

#   attention_plot = attention_plot[:len(result.split(' ')),
#                                   :len(sentence.split(' '))]
#   plot_attention(attention_plot, sentence.split(' '), result.split(' '))

In [None]:
translate("你怎么了")

Input: <start> 你 怎么 了 <end>
Predicted translation: you know how to be done . <end> 


In [None]:
translate("为什么训练结果很不好？")

Input: <start> 为什么 训练 结果 很 不好 <end>
Predicted translation: why , there is no one . <end> 


In [None]:
translate("那我就继续训练")

Input: <start> 那 我 就 继续 训练 <end>
Predicted translation: what i am not just as a few of the same . <end> 


In [None]:
EPOCHS = 10

for epoch in range(11,11 + EPOCHS):
  start = time.time()

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
    batch_loss = train_step(inp, targ, enc_hidden)
    total_loss += batch_loss

    if batch % 100 == 0:
      print(f'Epoch {epoch+1} Batch {batch} Loss {batch_loss.numpy():.4f}')
  # saving (checkpoint) the model every 2 epochs
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix=checkpoint_prefix)

  print(f'Epoch {epoch+1} Loss {total_loss/steps_per_epoch:.4f}')
  print(f'Time taken for 1 epoch {time.time()-start:.2f} sec\n')

Epoch 12 Batch 0 Loss 0.9867
Epoch 12 Batch 100 Loss 0.8365
Epoch 12 Batch 200 Loss 0.9500
Epoch 12 Batch 300 Loss 0.9165
Epoch 12 Loss 0.9478
Time taken for 1 epoch 309.13 sec

Epoch 13 Batch 0 Loss 0.8487
Epoch 13 Batch 100 Loss 0.8797
Epoch 13 Batch 200 Loss 0.9389
Epoch 13 Batch 300 Loss 0.9228
Epoch 13 Loss 0.9033
Time taken for 1 epoch 308.18 sec

Epoch 14 Batch 0 Loss 0.8366
Epoch 14 Batch 100 Loss 0.7698
Epoch 14 Batch 200 Loss 0.7555
Epoch 14 Batch 300 Loss 0.7917
Epoch 14 Loss 0.8598
Time taken for 1 epoch 308.85 sec

Epoch 15 Batch 0 Loss 0.8253
Epoch 15 Batch 100 Loss 0.7587
Epoch 15 Batch 200 Loss 0.8403
Epoch 15 Batch 300 Loss 0.8884
Epoch 15 Loss 0.8185
Time taken for 1 epoch 308.46 sec

Epoch 16 Batch 0 Loss 0.8029
Epoch 16 Batch 100 Loss 0.7919
Epoch 16 Batch 200 Loss 0.7926
Epoch 16 Batch 300 Loss 0.7850
Epoch 16 Loss 0.7797
Time taken for 1 epoch 308.74 sec

Epoch 17 Batch 0 Loss 0.7042
Epoch 17 Batch 100 Loss 0.7054
Epoch 17 Batch 200 Loss 0.6306
Epoch 17 Batch 300 

In [None]:
translate("这次的结果如何？")

Input: <start> 这次 的 结果 如何 <end>
Predicted translation: the outcome ? <end> 


In [None]:
translate("感觉比上次好")

Input: <start> 感觉 比 上次 好 <end>
Predicted translation: the more likely fall . <end> 


In [None]:
translate("看来训练的模型还是存在一些问题")

Input: <start> 看来 训练 的 模型 还是 存在 一些 问题 <end>
Predicted translation: there is to be done . <end> 


In [None]:
translate("继续去运行代码")

Input: <start> 继续 去 运行 代码 <end>
Predicted translation: no longer term . <end> 


In [None]:
translate("心情十分不好")

Input: <start> 心情 十分 不好 <end>
Predicted translation: the second moment to be the case for it . <end> 


In [None]:
translate("真的浪费时间")

Input: <start> 真的 浪费时间 <end>
Predicted translation: what went to explain to dare <end> 


In [None]:
EPOCHS = 10

for epoch in range(21,21 + EPOCHS):
  start = time.time()

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
    batch_loss = train_step(inp, targ, enc_hidden)
    total_loss += batch_loss

    if batch % 100 == 0:
      print(f'Epoch {epoch+1} Batch {batch} Loss {batch_loss.numpy():.4f}')
  # saving (checkpoint) the model every 2 epochs
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix=checkpoint_prefix)

  print(f'Epoch {epoch+1} Loss {total_loss/steps_per_epoch:.4f}')
  print(f'Time taken for 1 epoch {time.time()-start:.2f} sec\n')

Epoch 22 Batch 0 Loss 0.5870
Epoch 22 Batch 100 Loss 0.6153
Epoch 22 Batch 200 Loss 0.5793
Epoch 22 Batch 300 Loss 0.5155
Epoch 22 Loss 0.5826
Time taken for 1 epoch 308.80 sec

Epoch 23 Batch 0 Loss 0.4795
Epoch 23 Batch 100 Loss 0.5278
Epoch 23 Batch 200 Loss 0.5953
Epoch 23 Batch 300 Loss 0.5855
Epoch 23 Loss 0.5568
Time taken for 1 epoch 307.90 sec

Epoch 24 Batch 0 Loss 0.4495
Epoch 24 Batch 100 Loss 0.4986
Epoch 24 Batch 200 Loss 0.4979
Epoch 24 Batch 300 Loss 0.5073
Epoch 24 Loss 0.5309
Time taken for 1 epoch 308.09 sec

Epoch 25 Batch 0 Loss 0.5136
Epoch 25 Batch 100 Loss 0.5087
Epoch 25 Batch 200 Loss 0.4461
Epoch 25 Batch 300 Loss 0.4962
Epoch 25 Loss 0.5055
Time taken for 1 epoch 307.79 sec

Epoch 26 Batch 0 Loss 0.4839
Epoch 26 Batch 100 Loss 0.4623
Epoch 26 Batch 200 Loss 0.4831
Epoch 26 Batch 300 Loss 0.4841
Epoch 26 Loss 0.4814
Time taken for 1 epoch 308.12 sec

Epoch 27 Batch 0 Loss 0.4428
Epoch 27 Batch 100 Loss 0.4060
Epoch 27 Batch 200 Loss 0.4663
Epoch 27 Batch 300 

In [None]:
translate("巴黎-随着经济危机不断加深和蔓延，整个世界一直在寻找历史上的类似事件希望有助于我们了解目前正在发生的情况。")

Input: <start> 巴黎 随着 经济危机 不断 加深 和 蔓延 整个 世界 一直 在 寻找 历史 上 的 类似 事件 希望 有助于 我们 了解 目前 正在 发生 的 情况 <end>
Predicted translation: paris as the global recession is now in the world today is changing . <end> 


In [None]:
translate("当然，现在的情况和1989年的情况明显不同了。")

Input: <start> 当然 现在 的 情况 和 年 的 情况 明显 不同 了 <end>
Predicted translation: now , there is that story of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the lessons of the 


In [None]:
translate("作为哈佛大学和麻省理工学院的访问教授，我能看到危机过后的世界是什么样子的。")

Input: <start> 作为 哈佛大学 和 麻省理工学院 的 访问 教授 我能 看到 危机 过后 的 世界 是 什么 样子 的 <end>
Predicted translation: as a similar view could cure the world could not be the world crisis in the global crisis could not be the world crisis in the global crisis could not be the world crisis in the global crisis could not be the world crisis in the global crisis could not be the world crisis in the global crisis could not be the world crisis in the global crisis could not be the world crisis in the global crisis could not be the world crisis in the global crisis could not be the world crisis in the global crisis could not be the world crisis in the global 


In [None]:
translate("每个人似乎都是输家，即使有些国家比其它国家受到的影响更大。")

Input: <start> 每个 人 似乎 都 是 输家 即使 有些 国家 比 其它 国家 受到 的 影响 更大 <end>
Predicted translation: everyone seems to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to be more likely to 


In [None]:
translate("你实际上必须实施解决办法——并且在事实证明你知道的并不如你认为的那样多时，你得愿意改变办法。")

Input: <start> 你 实际上 必须 实施 解决办法 并且 在 事实证明 你 知道 的 并 不如 你 认为 的 那样 多时 你 得 愿意 改变 办法 <end>
Predicted translation: you cannot afford to mind about the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the sidelines of disruption from the 


In [None]:
EPOCHS = 10

for epoch in range(31,31 + EPOCHS):
  start = time.time()

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
    batch_loss = train_step(inp, targ, enc_hidden)
    total_loss += batch_loss

    if batch % 100 == 0:
      print(f'Epoch {epoch+1} Batch {batch} Loss {batch_loss.numpy():.4f}')
  # saving (checkpoint) the model every 2 epochs
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix=checkpoint_prefix)

  print(f'Epoch {epoch+1} Loss {total_loss/steps_per_epoch:.4f}')
  print(f'Time taken for 1 epoch {time.time()-start:.2f} sec\n')

Epoch 32 Batch 0 Loss 0.2969
Epoch 32 Batch 100 Loss 0.3601
Epoch 32 Batch 200 Loss 0.4054
Epoch 32 Batch 300 Loss 0.3351
Epoch 32 Loss 0.3620
Time taken for 1 epoch 308.43 sec

Epoch 33 Batch 0 Loss 0.2686
Epoch 33 Batch 100 Loss 0.3541
Epoch 33 Batch 200 Loss 0.2787
Epoch 33 Batch 300 Loss 0.3302
Epoch 33 Loss 0.3423
Time taken for 1 epoch 307.38 sec

Epoch 34 Batch 0 Loss 0.2981
Epoch 34 Batch 100 Loss 0.3281
Epoch 34 Batch 200 Loss 0.3430
Epoch 34 Batch 300 Loss 0.4020
Epoch 34 Loss 0.3246
Time taken for 1 epoch 307.85 sec

Epoch 35 Batch 0 Loss 0.3143
Epoch 35 Batch 100 Loss 0.2744
Epoch 35 Batch 200 Loss 0.3269
Epoch 35 Batch 300 Loss 0.3269
Epoch 35 Loss 0.3086
Time taken for 1 epoch 307.39 sec

Epoch 36 Batch 0 Loss 0.2763
Epoch 36 Batch 100 Loss 0.2725
Epoch 36 Batch 200 Loss 0.2816
Epoch 36 Batch 300 Loss 0.3009
Epoch 36 Loss 0.2944
Time taken for 1 epoch 307.80 sec

Epoch 37 Batch 0 Loss 0.2438
Epoch 37 Batch 100 Loss 0.2802
Epoch 37 Batch 200 Loss 0.2659
Epoch 37 Batch 300 

In [None]:
translate("不行啊")

Input: <start> 不行 啊 <end>
Predicted translation: but a lesson of hyper uncertainty <end> 


In [None]:
translate("相反，欧洲决策者拒绝采取货币刺激而实施了财政紧缩，无视其银行压力的加剧。")

Input: <start> 相反 欧洲 决策者 拒绝 采取 货币 刺激 而 实施 了 财政 紧缩 无视 其 银行 压力 的 加剧 <end>
Predicted translation: instead of european governments can only with their stimulus measures . <end> 


In [None]:
translate("简言之，2015年拖累全球经济的因素在新的一年里还会持续——有的甚至还会加剧。")

Input: <start> 简言之 年 拖累 全球 经济 的 因素 在 新 的 一年 里 还 会 持续 有 的 甚至 还会 加剧 <end>
Predicted translation: the single gdp in the global economy ever rise in the next few . <end> 
