In [None]:
!apt install nvidia-cuda-toolkit -y

In [None]:
!pip uninstall -y -q tensorflow tensorflow keras tensorflow-estimator tensorflow-text
!pip install protobuf~=3.20.3
!pip install -q tensorflow_datasets
!pip install -q -U --no-cache-dir tensorflow[and-cuda]==2.14.1 tensorflow-text 

In [None]:
# # import tensorflow as tf

# # Place tensors on the CPU
# with tf.device('/CPU:0'):
#   a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
#   b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

# # Run on the GPU
# c = tf.matmul(a, b)
# print(c)

In [None]:
# import tensorflow as tf
# print(tf.__version__)

In [None]:
!nvcc --version

In [None]:
!rm -rf nlp_data
!git clone https://github.com/lquyet/nlp_data.git

In [None]:
!nvidia-smi

In [None]:
import logging
import time

import numpy as np
import matplotlib.pyplot as plt

import tensorflow_datasets as tfds
import tensorflow as tf

import tensorflow_text

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Dataset setup

In [None]:
with open("/kaggle/working/nlp_data/v5/train2023_cleaned.vi", "r", encoding="utf-8") as f:
    vi = f.readlines()

with open("/kaggle/working/nlp_data/v5/train2023_cleaned.lo", "r", encoding="utf-8") as f:
    lo = f.readlines()
    
train_examples = tf.data.Dataset.from_tensor_slices((lo, vi))

In [None]:
with open("/kaggle/working/nlp_data/v5/dev2023_cleaned.vi", "r", encoding="utf-8") as f:
    vit = f.readlines()

with open("/kaggle/working/nlp_data/v5/dev2023_cleaned.lo", "r", encoding="utf-8") as f:
    lot = f.readlines()
    
val_examples = tf.data.Dataset.from_tensor_slices((lot, vit))

In [None]:
# with open("/kaggle/working/nlp_data/VLSP2023.TestSet/test_vi.txt", "r", encoding="utf-8") as f:
#     vit = f.readlines()

# with open("/kaggle/working/nlp_data/VLSP2023.TestSet/test_lo.txt", "r", encoding="utf-8") as f:
#     lot = f.readlines()
    
# val_examples = tf.data.Dataset.from_tensor_slices((lot, vit))

In [None]:
!ls nlp_data

In [None]:
for lo_examples, vi_examples in train_examples.batch(3).take(1):
  print('> Examples in Laos:')
  for l in lo_examples.numpy():
    print(l.decode('utf-8'))
  print()

  print('> Examples in Viet:')
  for v in vi_examples.numpy():
    print(v.decode('utf-8'))

In [None]:
!unzip /kaggle/working/nlp_data/token/token_model.zip

In [None]:
!ls 

In [None]:
tokenizers = tf.saved_model.load("btl_nlp_lao_viet")

In [None]:
[item for item in dir(tokenizers.vi) if not item.startswith('_')]

In [None]:
# from transformers import TFAutoModel, AutoTokenizer
# phobert = TFAutoModel.from_pretrained("vinai/phobert-base")

In [None]:
# vi_tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")

In [None]:
print('> This is a batch of strings:')
for v in vi_examples.numpy():
  print(v.decode('utf-8'))

In [None]:
# encoded = vi_tokenizer.encode("Mot ong sao sanh")
# encoded
# decode = vi_tokenizer.decode(encoded)
# decode

# # take eager tensor -> return ragged tensor
# def vi_tokenize(inputs):
#     res = []
#     for i in inputs.numpy():
#         to_encode = i.decode('utf-8')
#         res.append(vi_tokenizer.encode(to_encode))
#     return tf.ragged.constant(res)



In [None]:
encoded = tokenizers.vi.tokenize(vi_examples)
# encoded = vi_tokenize(vi_examples)

print('> This is a padded-batch of token IDs:')
for row in encoded.to_list():
  print(row)

In [None]:
# take ragged tensor, return eager tensor
# def vi_decode(inputs):
#     l = inputs.to_list()
#     r = []
#     for i in l:
#         res = vi_tokenizer.decode(i)
#         r.append(res)
#     return tf.constant(r)

In [None]:
round_trip = tokenizers.vi.detokenize(encoded)
# round_trip = vi_decode(encoded)

print('> This is human-readable text:')
for line in round_trip.numpy():
  print(line.decode('utf-8'))

In [None]:
# type(round_trip)

In [None]:
print('> This is the text split into tokens:')
tokens = tokenizers.vi.lookup(encoded)
tokens

In [None]:
lengths = []

for lo_examples, vi_examples in train_examples.batch(1024):
  lo_tokens = tokenizers.lo.tokenize(lo_examples)
  lengths.append(lo_tokens.row_lengths())

  vi_tokens = tokenizers.vi.tokenize(vi_examples)
  lengths.append(vi_tokens.row_lengths())
  print('.', end='', flush=True)

In [None]:
all_lengths = np.concatenate(lengths)

plt.hist(all_lengths, np.linspace(0, 500, 101))
plt.ylim(plt.ylim())
max_length = max(all_lengths)
plt.plot([max_length, max_length], plt.ylim())
plt.title(f'Maximum tokens per example: {max_length}');

In [None]:
MAX_TOKENS=128
def prepare_batch(lo, vi):
    lo = tokenizers.lo.tokenize(lo)      # Output is ragged.
    lo = lo[:, :MAX_TOKENS]    # Trim to MAX_TOKENS.
    lo = lo.to_tensor()  # Convert to 0-padded dense Tensor

    vi = tokenizers.vi.tokenize(vi)
    vi = vi[:, :(MAX_TOKENS+1)]
    vi_inputs = vi[:, :-1].to_tensor()  # Drop the [END] tokens
    vi_labels = vi[:, 1:].to_tensor()   # Drop the [START] tokens

    return (lo, vi_inputs), vi_labels

In [None]:
BUFFER_SIZE = 20000
BATCH_SIZE = 64

In [None]:
def make_batches(ds):
  return (
      ds
      .shuffle(BUFFER_SIZE)
      .batch(BATCH_SIZE)
      .map(prepare_batch, tf.data.AUTOTUNE)
      .prefetch(buffer_size=tf.data.AUTOTUNE))

In [None]:
# Create training and validation set batches.
train_batches = make_batches(train_examples)
val_batches = make_batches(val_examples)

In [None]:
for (lo, vi), vi_labels in train_batches.take(1):
  break

print(lo.shape)
print(vi.shape)
print(vi_labels.shape)

In [None]:
print(vi[0][:10])
print(vi_labels[0][:10])

In [None]:
def positional_encoding(length, depth):
  depth = depth/2

  positions = np.arange(length)[:, np.newaxis]     # (seq, 1)
  depths = np.arange(depth)[np.newaxis, :]/depth   # (1, depth)

  angle_rates = 1 / (10000**depths)         # (1, depth)
  angle_rads = positions * angle_rates      # (pos, depth)

  pos_encoding = np.concatenate(
      [np.sin(angle_rads), np.cos(angle_rads)],
      axis=-1)

  return tf.cast(pos_encoding, dtype=tf.float32)

In [None]:
#@title
pos_encoding = positional_encoding(length=2048, depth=512)

# Check the shape.
print(pos_encoding.shape)

# Plot the dimensions.
plt.pcolormesh(pos_encoding.numpy().T, cmap='RdBu')
plt.ylabel('Depth')
plt.xlabel('Position')
plt.colorbar()
plt.show()


In [None]:
#@title
pos_encoding/=tf.norm(pos_encoding, axis=1, keepdims=True)
p = pos_encoding[1000]
dots = tf.einsum('pd,d -> p', pos_encoding, p)
plt.subplot(2,1,1)
plt.plot(dots)
plt.ylim([0,1])
plt.plot([950, 950, float('nan'), 1050, 1050],
         [0,1,float('nan'),0,1], color='k', label='Zoom')
plt.legend()
plt.subplot(2,1,2)
plt.plot(dots)
plt.xlim([950, 1050])
plt.ylim([0,1])


In [None]:
class PositionalEmbedding(tf.keras.layers.Layer):
  def __init__(self, vocab_size, d_model):
    super().__init__()
    self.d_model = d_model
    self.embedding = tf.keras.layers.Embedding(vocab_size, d_model, mask_zero=True)
    self.pos_encoding = positional_encoding(length=2048, depth=d_model)

  def compute_mask(self, *args, **kwargs):
    return self.embedding.compute_mask(*args, **kwargs)

  def call(self, x):
    length = tf.shape(x)[1]
    x = self.embedding(x)
    # This factor sets the relative scale of the embedding and positonal_encoding.
    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
    x = x + self.pos_encoding[tf.newaxis, :length, :]
    return x


In [None]:
embed_lo = PositionalEmbedding(vocab_size=tokenizers.lo.get_vocab_size(), d_model=512)
embed_vi = PositionalEmbedding(vocab_size=tokenizers.vi.get_vocab_size(), d_model=512)

lo_emb = embed_lo(lo)
vi_emb = embed_vi(vi)

In [None]:
vi_emb._keras_mask

In [None]:
class BaseAttention(tf.keras.layers.Layer):
  def __init__(self, **kwargs):
    super().__init__()
    self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
    self.layernorm = tf.keras.layers.LayerNormalization()
    self.add = tf.keras.layers.Add()

In [None]:
class CrossAttention(BaseAttention):
  def call(self, x, context):
    attn_output, attn_scores = self.mha(
        query=x,
        key=context,
        value=context,
        return_attention_scores=True)

    # Cache the attention scores for plotting later.
    self.last_attn_scores = attn_scores

    x = self.add([x, attn_output])
    x = self.layernorm(x)

    return x

In [None]:
sample_ca = CrossAttention(num_heads=2, key_dim=512)

print(lo_emb.shape)
print(vi_emb.shape)
print(sample_ca(vi_emb, lo_emb).shape)

In [None]:
class GlobalSelfAttention(BaseAttention):
  def call(self, x):
    attn_output = self.mha(
        query=x,
        value=x,
        key=x)
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x

In [None]:
sample_gsa = GlobalSelfAttention(num_heads=2, key_dim=512)

print(lo_emb.shape)
print(sample_gsa(lo_emb).shape)

In [None]:
class CausalSelfAttention(BaseAttention):
  def call(self, x):
    attn_output = self.mha(
        query=x,
        value=x,
        key=x,
        use_causal_mask = True)
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x

In [None]:
sample_csa = CausalSelfAttention(num_heads=2, key_dim=512)

print(vi_emb.shape)
print(sample_csa(vi_emb).shape)

In [None]:
out1 = sample_csa(embed_vi(vi[:, :3]))
out2 = sample_csa(embed_vi(vi))[:, :3]

tf.reduce_max(abs(out1 - out2)).numpy()

In [None]:
class FeedForward(tf.keras.layers.Layer):
  def __init__(self, d_model, dff, dropout_rate=0.1):
    super().__init__()
    self.seq = tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation='relu'),
      tf.keras.layers.Dense(d_model),
      tf.keras.layers.Dropout(dropout_rate)
    ])
    self.add = tf.keras.layers.Add()
    self.layer_norm = tf.keras.layers.LayerNormalization()

  def call(self, x):
    x = self.add([x, self.seq(x)])
    x = self.layer_norm(x)
    return x


In [None]:
sample_ffn = FeedForward(512, 2048)

print(vi_emb.shape)
print(sample_ffn(vi_emb).shape)

In [None]:
class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self,*, d_model, num_heads, dff, dropout_rate=0.1):
    super().__init__()

    self.self_attention = GlobalSelfAttention(
        num_heads=num_heads,
        key_dim=d_model,
        dropout=dropout_rate)

    self.ffn = FeedForward(d_model, dff)

  def call(self, x):
    x = self.self_attention(x)
    x = self.ffn(x)
    return x

In [None]:
sample_encoder_layer = EncoderLayer(d_model=512, num_heads=8, dff=2048)

print(lo_emb.shape)
print(sample_encoder_layer(lo_emb).shape)

In [None]:
class Encoder(tf.keras.layers.Layer):
  def __init__(self, *, num_layers, d_model, num_heads,
               dff, vocab_size, dropout_rate=0.1):
    super().__init__()

    self.d_model = d_model
    self.num_layers = num_layers

    self.pos_embedding = PositionalEmbedding(
        vocab_size=vocab_size, d_model=d_model)

    self.enc_layers = [
        EncoderLayer(d_model=d_model,
                     num_heads=num_heads,
                     dff=dff,
                     dropout_rate=dropout_rate)
        for _ in range(num_layers)]
    self.dropout = tf.keras.layers.Dropout(dropout_rate)

  def call(self, x):
    # `x` is token-IDs shape: (batch, seq_len)
    x = self.pos_embedding(x)  # Shape `(batch_size, seq_len, d_model)`.

    # Add dropout.
    x = self.dropout(x)

    for i in range(self.num_layers):
      x = self.enc_layers[i](x)

    return x  # Shape `(batch_size, seq_len, d_model)`.

In [None]:
# Instantiate the encoder.
sample_encoder = Encoder(num_layers=4,
                         d_model=512,
                         num_heads=8,
                         dff=2048,
                         vocab_size=8500)

sample_encoder_output = sample_encoder(lo, training=False)

# Print the shape.
print(lo.shape)
print(sample_encoder_output.shape)  # Shape `(batch_size, input_seq_len, d_model)`.

In [None]:
class DecoderLayer(tf.keras.layers.Layer):
  def __init__(self,
               *,
               d_model,
               num_heads,
               dff,
               dropout_rate=0.1):
    super(DecoderLayer, self).__init__()

    self.causal_self_attention = CausalSelfAttention(
        num_heads=num_heads,
        key_dim=d_model,
        dropout=dropout_rate)

    self.cross_attention = CrossAttention(
        num_heads=num_heads,
        key_dim=d_model,
        dropout=dropout_rate)

    self.ffn = FeedForward(d_model, dff)

  def call(self, x, context):
    x = self.causal_self_attention(x=x)
    x = self.cross_attention(x=x, context=context)

    # Cache the last attention scores for plotting later
    self.last_attn_scores = self.cross_attention.last_attn_scores

    x = self.ffn(x)  # Shape `(batch_size, seq_len, d_model)`.
    return x

In [None]:
sample_decoder_layer = DecoderLayer(d_model=512, num_heads=8, dff=2048)

sample_decoder_layer_output = sample_decoder_layer(
    x=vi_emb, context=lo_emb)

print(vi_emb.shape)
print(lo_emb.shape)
print(sample_decoder_layer_output.shape)  # `(batch_size, seq_len, d_model)`

In [None]:
class Decoder(tf.keras.layers.Layer):
  def __init__(self, *, num_layers, d_model, num_heads, dff, vocab_size,
               dropout_rate=0.1):
    super(Decoder, self).__init__()

    self.d_model = d_model
    self.num_layers = num_layers

    self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size,
                                             d_model=d_model)
    self.dropout = tf.keras.layers.Dropout(dropout_rate)
    self.dec_layers = [
        DecoderLayer(d_model=d_model, num_heads=num_heads,
                     dff=dff, dropout_rate=dropout_rate)
        for _ in range(num_layers)]

    self.last_attn_scores = None

  def call(self, x, context):
    # `x` is token-IDs shape (batch, target_seq_len)
    x = self.pos_embedding(x)  # (batch_size, target_seq_len, d_model)

    x = self.dropout(x)

    for i in range(self.num_layers):
      x  = self.dec_layers[i](x, context)

    self.last_attn_scores = self.dec_layers[-1].last_attn_scores

    # The shape of x is (batch_size, target_seq_len, d_model).
    return x

In [None]:
# Instantiate the decoder.
sample_decoder = Decoder(num_layers=4,
                         d_model=512,
                         num_heads=8,
                         dff=2048,
                         vocab_size=8000)

output = sample_decoder(
    x=vi,
    context=lo_emb)

# Print the shapes.
print(vi.shape)
print(lo_emb.shape)
print(output.shape)

In [None]:
sample_decoder.last_attn_scores.shape  # (batch, heads, target_seq, input_seq)

In [None]:
class Transformer(tf.keras.Model):
  def __init__(self, *, num_layers, d_model, num_heads, dff,
               input_vocab_size, target_vocab_size, dropout_rate=0.1):
    super().__init__()
    self.encoder = Encoder(num_layers=num_layers, d_model=d_model,
                           num_heads=num_heads, dff=dff,
                           vocab_size=input_vocab_size,
                           dropout_rate=dropout_rate)

    self.decoder = Decoder(num_layers=num_layers, d_model=d_model,
                           num_heads=num_heads, dff=dff,
                           vocab_size=target_vocab_size,
                           dropout_rate=dropout_rate)

    self.final_layer = tf.keras.layers.Dense(target_vocab_size)

  def call(self, inputs):
    # To use a Keras model with `.fit` you must pass all your inputs in the
    # first argument.
    context, x  = inputs

    context = self.encoder(context)  # (batch_size, context_len, d_model)

    x = self.decoder(x, context)  # (batch_size, target_len, d_model)

    # Final linear layer output.
    logits = self.final_layer(x)  # (batch_size, target_len, target_vocab_size)

    try:
      # Drop the keras mask, so it doesn't scale the losses/metrics.
      # b/250038731
      del logits._keras_mask
    except AttributeError:
      pass

    # Return the final output and the attention weights.
    return logits

In [None]:
num_layers = 4
d_model = 128
dff = 512
num_heads = 8
dropout_rate = 0.1

In [None]:
# strategy = tf.distribute.MirroredStrategy()
# print('DEVICES AVAILABLE: {}'.format(strategy.num_replicas_in_sync))

In [None]:
# learning_rate = CustomSchedule(d_model)

# optimizer = tf.keras.optimizers.legacy.Adam(learning_rate, beta_1=0.9, beta_2=0.98,
#                                      epsilon=1e-9)

# # with strategy.scope():
# transformer = Transformer(
# num_layers=num_layers,
# d_model=d_model,
# num_heads=num_heads,
# dff=dff,
# input_vocab_size=tokenizers.lo.get_vocab_size().numpy(),
# target_vocab_size=tokenizers.vi.get_vocab_size().numpy(),
# dropout_rate=dropout_rate)

# output = transformer((lo, vi))

# transformer.compile(
# loss=masked_loss,
# optimizer=optimizer,
# metrics=[masked_accuracy])
    

In [None]:
transformer = Transformer(
    num_layers=num_layers,
    d_model=d_model,
    num_heads=num_heads,
    dff=dff,
    input_vocab_size=tokenizers.lo.get_vocab_size().numpy(),
    target_vocab_size=tokenizers.vi.get_vocab_size().numpy(),
    dropout_rate=dropout_rate)

In [None]:
output = transformer((lo, vi))

print(vi.shape)
print(lo.shape)
print(output.shape)

In [None]:
attn_scores = transformer.decoder.dec_layers[-1].last_attn_scores
print(attn_scores.shape)  # (batch, heads, target_seq, input_seq)

In [None]:
transformer.summary()

In [None]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
  def __init__(self, d_model, warmup_steps=4000):
    super().__init__()

    self.d_model = d_model
    self.d_model = tf.cast(self.d_model, tf.float32)

    self.warmup_steps = warmup_steps

  def __call__(self, step):
    step = tf.cast(step, dtype=tf.float32)
    arg1 = tf.math.rsqrt(step)
    arg2 = step * (self.warmup_steps ** -1.5)

    return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

In [None]:
learning_rate = CustomSchedule(d_model)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,
                                     epsilon=1e-9)

In [None]:
plt.plot(learning_rate(tf.range(40000, dtype=tf.float32)))
plt.ylabel('Learning Rate')
plt.xlabel('Train Step')

In [None]:
def masked_loss(label, pred):
  mask = label != 0
  loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')
  loss = loss_object(label, pred)

  mask = tf.cast(mask, dtype=loss.dtype)
  loss *= mask

  loss = tf.reduce_sum(loss)/tf.reduce_sum(mask)
  return loss


def masked_accuracy(label, pred):
  pred = tf.argmax(pred, axis=2)
  label = tf.cast(label, pred.dtype)
  match = label == pred

  mask = label != 0

  match = match & mask

  match = tf.cast(match, dtype=tf.float32)
  mask = tf.cast(mask, dtype=tf.float32)
  return tf.reduce_sum(match)/tf.reduce_sum(mask)

In [None]:
transformer.compile(
    loss=masked_loss,
    optimizer=optimizer,
    metrics=[masked_accuracy])


In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
transformer.fit(train_batches,
                epochs=20,
                validation_data=val_batches)

In [None]:
class Translator(tf.Module):
  def __init__(self, tokenizers, transformer):
    self.tokenizers = tokenizers
    self.transformer = transformer

  def __call__(self, sentence, max_length=MAX_TOKENS):
    # The input sentence is Portuguese, hence adding the `[START]` and `[END]` tokens.
    assert isinstance(sentence, tf.Tensor)
    if len(sentence.shape) == 0:
      sentence = sentence[tf.newaxis]

    sentence = self.tokenizers.lo.tokenize(sentence).to_tensor()

    encoder_input = sentence

    # As the output language is English, initialize the output with the
    # English `[START]` token.
    start_end = self.tokenizers.vi.tokenize([''])[0]
    start = start_end[0][tf.newaxis]
    end = start_end[1][tf.newaxis]

    # `tf.TensorArray` is required here (instead of a Python list), so that the
    # dynamic-loop can be traced by `tf.function`.
    output_array = tf.TensorArray(dtype=tf.int64, size=0, dynamic_size=True)
    output_array = output_array.write(0, start)

    for i in tf.range(max_length):
      output = tf.transpose(output_array.stack())
      predictions = self.transformer([encoder_input, output], training=False)

      # Select the last token from the `seq_len` dimension.
      predictions = predictions[:, -1:, :]  # Shape `(batch_size, 1, vocab_size)`.

      predicted_id = tf.argmax(predictions, axis=-1)

      # Concatenate the `predicted_id` to the output which is given to the
      # decoder as its input.
      output_array = output_array.write(i+1, predicted_id[0])

      if predicted_id == end:
        break

    output = tf.transpose(output_array.stack())
    # The output shape is `(1, tokens)`.
    text = tokenizers.vi.detokenize(output)[0]  # Shape: `()`.

    tokens = tokenizers.vi.lookup(output)[0]

    # `tf.function` prevents us from using the attention_weights that were
    # calculated on the last iteration of the loop.
    # So, recalculate them outside the loop.
    self.transformer([encoder_input, output[:,:-1]], training=False)
    attention_weights = self.transformer.decoder.last_attn_scores

    return text, tokens, attention_weights

In [None]:
translator = Translator(tokenizers, transformer)

In [None]:
def print_translation(sentence, tokens, ground_truth):
  print(f'{"Input:":15s}: {sentence}')
  print(f'{"Prediction":15s}: {tokens.numpy().decode("utf-8")}')
  print(f'{"Ground truth":15s}: {ground_truth}')

In [None]:
sentence = 'ສາ ທາ ລະ ນະ ລັດ ສັງ ຄົມ ນິ ຍົມ ຫວຽດ ນາມ ຍືນ ຍົງ ຄົງ ຕົວ'
ground_truth = 'Cộng hòa xã hội chủ nghĩa Việt Nam muôn năm'

translated_text, translated_tokens, attention_weights = translator(
    tf.constant(sentence))
print_translation(sentence, translated_text, ground_truth)

In [None]:
# class ExportTranslator(tf.Module):
#   def __init__(self, translator):
#     self.translator = translator

#   @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.string)])
#   def __call__(self, sentence):
#     (result,
#      tokens,
#      attention_weights) = self.translator(sentence, max_length=MAX_TOKENS)

#     return result

In [None]:
# translator = ExportTranslator(translator)

In [None]:
# translator('ສາ ທາ ລະ ນະ ລັດ ສັງ ຄົມ ນິ ຍົມ ຫວຽດ ນາມ ຍືນ ຍົງ ຄົງ ຕົວ').numpy()

In [None]:
!rm -rf result
!mkdir result
tf.saved_model.save(translator, export_dir='result')

In [None]:
# reloaded = tf.saved_model.load('result')

In [None]:
# reloaded('ສາ ທາ ລະ ນະ ລັດ ສັງ ຄົມ ນິ ຍົມ ຫວຽດ ນາມ ຍືນ ຍົງ ຄົງ ຕົວ').numpy()