In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from tensor2tensor.data_generators import problem
from tensor2tensor.data_generators import text_problems
from tensor2tensor.data_generators import translate
from tensor2tensor.utils import registry
from tensor2tensor import problems
import tensorflow as tf
import os
import logging

logger = logging.getLogger()
tf.logging.set_verbosity(tf.logging.DEBUG)

In [2]:
import sentencepiece as spm

vocab = 'sp10m.cased.t5.model'
sp = spm.SentencePieceProcessor()
sp.Load(vocab)

class Encoder:
    def __init__(self, sp):
        self.sp = sp
        self.vocab_size = sp.GetPieceSize() + 100
    
    def encode(self, s):
        return self.sp.EncodeAsIds(s)
    
    def decode(self, ids, strip_extraneous=False):
        return self.sp.DecodeIds(list(ids))
    
encoder = Encoder(sp)

In [3]:
from tqdm import tqdm
from glob import glob

@registry.register_problem
class Seq2Seq(text_problems.Text2TextProblem):

    @property
    def approx_vocab_size(self):
        return 32100
    
    @property
    def is_generate_per_split(self):
        return False
            
    def feature_encoders(self, data_dir):
        encoder = Encoder(sp)
        return {
            "inputs": encoder,
            "targets": encoder
        }


In [4]:
DATA_DIR = os.path.expanduser('t2t-paraphrase/data')
TMP_DIR = os.path.expanduser('t2t-paraphrase/tmp')

In [5]:
PROBLEM = 'seq2_seq'
t2t_problem = problems.problem(PROBLEM)

In [6]:
import tensorflow as tf
import os

ckpt_path = tf.train.latest_checkpoint(os.path.join('t2t-paraphrase/train-base'))
ckpt_path

't2t-paraphrase/train-base/model.ckpt-100000'

In [7]:
from tensor2tensor import models
from tensor2tensor import problems
from tensor2tensor.layers import common_layers
from tensor2tensor.utils import trainer_lib
from tensor2tensor.utils import t2t_model
from tensor2tensor.utils import registry
from tensor2tensor.utils import metrics
from tensor2tensor.data_generators import problem
from tensor2tensor.data_generators import text_problems
from tensor2tensor.data_generators import translate
from tensor2tensor.utils import registry







In [8]:
from tensor2tensor.layers import modalities
from tensor2tensor.layers import common_layers

def top_p_logits(logits, p):
    with tf.variable_scope('top_p_logits'):
        logits_sort = tf.sort(logits, direction = 'DESCENDING')
        probs_sort = tf.nn.softmax(logits_sort)
        probs_sums = tf.cumsum(probs_sort, axis = 1, exclusive = True)
        logits_masked = tf.where(
            probs_sums < p, logits_sort, tf.ones_like(logits_sort) * 1000
        )  # [batchsize, vocab]
        min_logits = tf.reduce_min(
            logits_masked, axis = 1, keepdims = True
        )  # [batchsize, 1]
        return tf.where(
            logits < min_logits,
            tf.ones_like(logits, dtype = logits.dtype) * -1e10,
            logits,
        )


def sample(translate_model, features):
    logits, losses = translate_model(features)
    logits_shape = common_layers.shape_list(logits)
    logits_p = logits[:,0,:,0,:] / translate_model.hparams.sampling_temp
    logits_p = top_p_logits(logits_p, translate_model.hparams.top_p)
    reshaped_logits = tf.reshape(logits_p, [-1, logits_shape[-1]])
    choices = tf.multinomial(reshaped_logits, 1)
    samples = tf.reshape(choices, logits_shape[:-1])
    return samples, logits, losses

def nucleus_sampling(translate_model, features, decode_length):
    """A slow greedy inference method.
    Quadratic time in decode_length.
    Args:
      features: an map of string to `Tensor`
      decode_length: an integer.  How many additional timesteps to decode.
    Returns:
      A dict of decoding results {
          "outputs": integer `Tensor` of decoded ids of shape
              [batch_size, <= decode_length] if beam_size == 1 or
              [batch_size, top_beams, <= decode_length]
          "scores": None
          "logits": `Tensor` of shape [batch_size, time, 1, 1, vocab_size].
          "losses": a dictionary: {loss-name (string): floating point `Scalar`}
      }
    """
    if not features:
        features = {}
    inputs_old = None
    if 'inputs' in features and len(features['inputs'].shape) < 4:
        inputs_old = features['inputs']
        features['inputs'] = tf.expand_dims(features['inputs'], 2)
    # Save the targets in a var and reassign it after the tf.while loop to avoid
    # having targets being in a 'while' frame. This ensures targets when used
    # in metric functions stays in the same frame as other vars.
    targets_old = features.get('targets', None)

    target_modality = translate_model._problem_hparams.modality['targets']

    def infer_step(recent_output, recent_logits, unused_loss):
        """Inference step."""
        if not tf.executing_eagerly():
            if translate_model._target_modality_is_real:
                dim = translate_model._problem_hparams.vocab_size['targets']
                if dim is not None and hasattr(
                    translate_model._hparams, 'vocab_divisor'
                ):
                    dim += (-dim) % translate_model._hparams.vocab_divisor
                recent_output.set_shape([None, None, None, dim])
            else:
                recent_output.set_shape([None, None, None, 1])
        padded = tf.pad(recent_output, [[0, 0], [0, 1], [0, 0], [0, 0]])
        features['targets'] = padded
        # This is inefficient in that it generates samples at all timesteps,
        # not just the last one, except if target_modality is pointwise.
        samples, logits, losses = sample(translate_model, features)
        # Concatenate the already-generated recent_output with last timestep
        # of the newly-generated samples.
        top = translate_model._hparams.top.get(
            'targets', modalities.get_top(target_modality)
        )
        if getattr(top, 'pointwise', False):
            cur_sample = samples[:, -1, :, :]
        else:
            cur_sample = samples[
                :, common_layers.shape_list(recent_output)[1], :, :
            ]
        if translate_model._target_modality_is_real:
            cur_sample = tf.expand_dims(cur_sample, axis = 1)
            samples = tf.concat([recent_output, cur_sample], axis = 1)
        else:
            cur_sample = tf.to_int64(tf.expand_dims(cur_sample, axis = 1))
            samples = tf.concat([recent_output, cur_sample], axis = 1)
            if not tf.executing_eagerly():
                samples.set_shape([None, None, None, 1])

        # Assuming we have one shard for logits.
        logits = tf.concat([recent_logits, logits[:, -1:]], 1)
        loss = sum([l for l in losses.values() if l is not None])
        return samples, logits, loss

    # Create an initial output tensor. This will be passed
    # to the infer_step, which adds one timestep at every iteration.
    if 'partial_targets' in features:
        initial_output = tf.to_int64(features['partial_targets'])
        while len(initial_output.get_shape().as_list()) < 4:
            initial_output = tf.expand_dims(initial_output, 2)
        batch_size = common_layers.shape_list(initial_output)[0]
    else:
        batch_size = common_layers.shape_list(features['inputs'])[0]
        if translate_model._target_modality_is_real:
            dim = translate_model._problem_hparams.vocab_size['targets']
            if dim is not None and hasattr(
                translate_model._hparams, 'vocab_divisor'
            ):
                dim += (-dim) % translate_model._hparams.vocab_divisor
            initial_output = tf.zeros(
                (batch_size, 0, 1, dim), dtype = tf.float32
            )
        else:
            initial_output = tf.zeros((batch_size, 0, 1, 1), dtype = tf.int64)
    # Hack: foldl complains when the output shape is less specified than the
    # input shape, so we confuse it about the input shape.
    initial_output = tf.slice(
        initial_output, [0, 0, 0, 0], common_layers.shape_list(initial_output)
    )
    target_modality = translate_model._problem_hparams.modality['targets']
    if (
        target_modality == modalities.ModalityType.CLASS_LABEL
        or translate_model._problem_hparams.get('regression_targets')
    ):
        decode_length = 1
    else:
        if 'partial_targets' in features:
            prefix_length = common_layers.shape_list(
                features['partial_targets']
            )[1]
        else:
            prefix_length = common_layers.shape_list(features['inputs'])[1]
        decode_length = prefix_length + decode_length

    # Initial values of result, logits and loss.
    result = initial_output
    vocab_size = translate_model._problem_hparams.vocab_size['targets']
    if vocab_size is not None and hasattr(
        translate_model._hparams, 'vocab_divisor'
    ):
        vocab_size += (-vocab_size) % translate_model._hparams.vocab_divisor
    if translate_model._target_modality_is_real:
        logits = tf.zeros((batch_size, 0, 1, vocab_size))
        logits_shape_inv = [None, None, None, None]
    else:
        # tensor of shape [batch_size, time, 1, 1, vocab_size]
        logits = tf.zeros((batch_size, 0, 1, 1, vocab_size))
        logits_shape_inv = [None, None, None, None, None]
    if not tf.executing_eagerly():
        logits.set_shape(logits_shape_inv)

    loss = 0.0

    def while_exit_cond(
        result, logits, loss
    ):  # pylint: disable=unused-argument
        """Exit the loop either if reach decode_length or EOS."""
        length = common_layers.shape_list(result)[1]

        not_overflow = length < decode_length

        if translate_model._problem_hparams.stop_at_eos:

            def fn_not_eos():
                return tf.not_equal(  # Check if the last predicted element is a EOS
                    tf.squeeze(result[:, -1, :, :]), 1
                )

            not_eos = tf.cond(
                # We only check for early stopping if there is at least 1 element (
                # otherwise not_eos will crash).
                tf.not_equal(length, 0),
                fn_not_eos,
                lambda: True,
            )

            return tf.cond(
                tf.equal(batch_size, 1),
                # If batch_size == 1, we check EOS for early stopping.
                lambda: tf.logical_and(not_overflow, not_eos),
                # Else, just wait for max length
                lambda: not_overflow,
            )
        return not_overflow

    result, logits, loss = tf.while_loop(
        while_exit_cond,
        infer_step,
        [result, logits, loss],
        shape_invariants = [
            tf.TensorShape([None, None, None, None]),
            tf.TensorShape(logits_shape_inv),
            tf.TensorShape([]),
        ],
        back_prop = False,
        parallel_iterations = 1,
    )
    if inputs_old is not None:  # Restore to not confuse Estimator.
        features['inputs'] = inputs_old
    # Reassign targets back to the previous value.
    if targets_old is not None:
        features['targets'] = targets_old
    losses = {'training': loss}
    if 'partial_targets' in features:
        partial_target_length = common_layers.shape_list(
            features['partial_targets']
        )[1]
        result = tf.slice(
            result, [0, partial_target_length, 0, 0], [-1, -1, -1, -1]
        )
    return {
        'outputs': result,
        'scores': None,
        'logits': logits,
        'losses': losses,
    }

In [9]:
class Model:
    def __init__(self, HPARAMS = "transformer_base", DATA_DIR = 't2t/data'):
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.top_p = tf.placeholder(tf.float32, None)
        
        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.maxlen_decode = tf.reduce_max(self.X_seq_len)
        #self.maxlen_decode = tf.placeholder(tf.int32, None)
        
        x = tf.expand_dims(tf.expand_dims(self.X, -1), -1)
        y = tf.expand_dims(tf.expand_dims(self.Y, -1), -1)
        
        features = {
            "inputs": x,
            "targets": y,
            "target_space_id": tf.constant(1, dtype=tf.int32),
        }
        self.features = features
        
        Modes = tf.estimator.ModeKeys
        hparams = trainer_lib.create_hparams(HPARAMS, data_dir=DATA_DIR, problem_name=PROBLEM)
        hparams.filter_size = 3072
        hparams.hidden_size = 768
        hparams.num_heads = 12
        hparams.num_hidden_layers = 8
        hparams.vocab_divisor = 128
        hparams.label_smoothing = 0.0
        hparams.shared_embedding_and_softmax_weights = False
        hparams.dropout = 0.1
        hparams.max_length = 1024
        hparams.multiproblem_mixing_schedule = "pretrain"

        hparams.optimizer = "Adafactor"
        hparams.learning_rate_warmup_steps = 10000
        hparams.learning_rate_schedule = "rsqrt_decay"
        #hparams.problem_hparams.stop_at_eos = 0
        
        translate_model = registry.model('transformer')(hparams, Modes.PREDICT)
        self.translate_model = translate_model
        logits, _ = translate_model(features)
        self.logits = logits
        translate_model.hparams.top_p = self.top_p
        
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            self.fast_result = translate_model._greedy_infer(features, self.maxlen_decode)["outputs"]
            self.beam_result = translate_model._beam_decode_slow(
                features, self.maxlen_decode, beam_size=3, 
                top_beams=1, alpha=0.5)["outputs"]
            self.nucleus_result = nucleus_sampling(translate_model, features, self.maxlen_decode)["outputs"]
            self.nucleus_result = self.nucleus_result[:,:,0,0]
        
        self.fast_result = tf.identity(self.fast_result, name = 'greedy')
        self.beam_result = tf.identity(self.beam_result, name = 'beam')
        self.nucleus_result = tf.identity(self.nucleus_result, name = 'nucleus')
        
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model()
var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
saver = tf.train.Saver(var_list = var_lists)
saver.restore(sess, ckpt_path)

Instructions for updating:
reduction_indices is deprecated, use axis instead


Instructions for updating:
reduction_indices is deprecated, use axis instead


INFO:tensorflow:Setting T2TModel mode to 'infer'


INFO:tensorflow:Setting T2TModel mode to 'infer'


INFO:tensorflow:Setting hparams.dropout to 0.0


INFO:tensorflow:Setting hparams.dropout to 0.0


INFO:tensorflow:Setting hparams.label_smoothing to 0.0


INFO:tensorflow:Setting hparams.label_smoothing to 0.0


INFO:tensorflow:Setting hparams.layer_prepostprocess_dropout to 0.0


INFO:tensorflow:Setting hparams.layer_prepostprocess_dropout to 0.0


INFO:tensorflow:Setting hparams.symbol_dropout to 0.0


INFO:tensorflow:Setting hparams.symbol_dropout to 0.0


INFO:tensorflow:Setting hparams.attention_dropout to 0.0


INFO:tensorflow:Setting hparams.attention_dropout to 0.0


INFO:tensorflow:Setting hparams.relu_dropout to 0.0


INFO:tensorflow:Setting hparams.relu_dropout to 0.0


INFO:tensorflow:Using variable initializer: uniform_unit_scaling


INFO:tensorflow:Using variable initializer: uniform_unit_scaling






Instructions for updating:
Use `tf.cast` instead.


Instructions for updating:
Use `tf.cast` instead.


Instructions for updating:
Use `tf.cast` instead.


Instructions for updating:
Use `tf.cast` instead.


INFO:tensorflow:Transforming feature 'inputs' with symbol_modality_32128_768.bottom


INFO:tensorflow:Transforming feature 'inputs' with symbol_modality_32128_768.bottom


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


INFO:tensorflow:Transforming feature 'targets' with symbol_modality_32128_768.targets_bottom


INFO:tensorflow:Transforming feature 'targets' with symbol_modality_32128_768.targets_bottom


INFO:tensorflow:Building model body


INFO:tensorflow:Building model body


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where






INFO:tensorflow:Transforming body output with symbol_modality_32128_768.top


INFO:tensorflow:Transforming body output with symbol_modality_32128_768.top


Instructions for updating:
Use `tf.cast` instead.


Instructions for updating:
Use `tf.cast` instead.


INFO:tensorflow:Using variable initializer: uniform_unit_scaling


INFO:tensorflow:Using variable initializer: uniform_unit_scaling


INFO:tensorflow:Transforming feature 'inputs' with symbol_modality_32128_768.bottom


INFO:tensorflow:Transforming feature 'inputs' with symbol_modality_32128_768.bottom


INFO:tensorflow:Transforming feature 'targets' with symbol_modality_32128_768.targets_bottom


INFO:tensorflow:Transforming feature 'targets' with symbol_modality_32128_768.targets_bottom


INFO:tensorflow:Building model body


INFO:tensorflow:Building model body


INFO:tensorflow:Transforming body output with symbol_modality_32128_768.top


INFO:tensorflow:Transforming body output with symbol_modality_32128_768.top


INFO:tensorflow:Using variable initializer: uniform_unit_scaling


INFO:tensorflow:Using variable initializer: uniform_unit_scaling


INFO:tensorflow:Transforming feature 'inputs' with symbol_modality_32128_768.bottom


INFO:tensorflow:Transforming feature 'inputs' with symbol_modality_32128_768.bottom


INFO:tensorflow:Transforming feature 'targets' with symbol_modality_32128_768.targets_bottom


INFO:tensorflow:Transforming feature 'targets' with symbol_modality_32128_768.targets_bottom


INFO:tensorflow:Building model body


INFO:tensorflow:Building model body


INFO:tensorflow:Transforming body output with symbol_modality_32128_768.top


INFO:tensorflow:Transforming body output with symbol_modality_32128_768.top


Instructions for updating:
Use `tf.random.categorical` instead.


Instructions for updating:
Use `tf.random.categorical` instead.


INFO:tensorflow:Restoring parameters from t2t-paraphrase/train-base/model.ckpt-100000


INFO:tensorflow:Restoring parameters from t2t-paraphrase/train-base/model.ckpt-100000


In [10]:
import re
from unidecode import unidecode

def cleaning(string):
    return re.sub(r'[ ]+', ' ', unidecode(string.replace('\n', ' '))).strip()

In [11]:
import re
from unidecode import unidecode
from malaya.text.rules import normalized_chars

def filter_news(string):
    string = string.lower()
    return 'javascript is disabled' in string or 'requires javascript' in string or 'javascript' in string \
    or 'président' in string

def make_cleaning(s, c_dict):
    s = s.translate(c_dict)
    return s

def transformer_textcleaning(string):
    """
    use by any transformer model before tokenization
    """
    string = unidecode(string)
    string = ' '.join(
        [make_cleaning(w, normalized_chars) for w in string.split()]
    )
    string = re.sub('\(dot\)', '.', string)
    string = (
        re.sub(re.findall(r'\<a(.*?)\>', string)[0], '', string)
        if (len(re.findall(r'\<a (.*?)\>', string)) > 0)
        and ('href' in re.findall(r'\<a (.*?)\>', string)[0])
        else string
    )
    string = re.sub(
        r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', ' ', string
    )
    string = string.replace('\n', ' ')
    string = re.sub(r'[ ]+', ' ', string).strip().split()
    string = [w for w in string if w[0] != '@']
    return ' '.join(string)

In [12]:
import json

# !wget https://f000.backblazeb2.com/file/malay-dataset/testset/paraphrase-set.json

with open('../summary/paraphrase-set.json') as fopen:
    test = json.load(fopen)
test.keys()

dict_keys(['before', 'test_before', 'after', 'test_after'])

In [14]:
pad_sequences = tf.keras.preprocessing.sequence.pad_sequences

In [17]:
from tqdm import tqdm

batch_size = 20
results = []
for i in tqdm(range(0, len(test['test_before']), batch_size)):
    batch_x = test['test_before'][i: i + batch_size]
    batches = []
    for b in batch_x:
        encoded = encoder.encode(f'parafrasa: {cleaning(b)}')
        encoded = encoded[:1023] + [1] 
        batches.append(encoded)
    batches = pad_sequences(batches, padding='post')
    g = sess.run(model.fast_result, feed_dict = {model.X:batches})
    for b in g:
        results.append(encoder.decode(b.tolist()))

100%|██████████| 50/50 [01:40<00:00,  2.01s/it]


In [18]:
from tensor2tensor.utils import bleu_hook

bleu_hook.compute_bleu(reference_corpus = test['test_after'], 
                       translation_corpus = results)

0.597052

In [19]:
encoded = encoder.encode(f"parafrasa: {cleaning(test['test_before'][0])}") + [1]
f, b, n = sess.run([model.fast_result, model.beam_result, model.nucleus_result], 
             feed_dict = {model.X: [encoded], model.top_p: 0.7})

(encoder.decode(f[0].tolist()), 
encoder.decode(b[0].tolist()), 
encoder.decode(n[0].tolist()))

('Ayahnya pernah menjadi pandai besi dan penemu dan pernah bekerja di California dengan tali besi.',
 'Ayahnya pernah menjadi pandai besi dan penemu dan pernah bekerja di California dengan tali besi.',
 'Ayahnya pernah menjadi pandai besi dan penemu dan pernah bekerja di California dengan tali besi.')

In [20]:
test['test_after'][0]

'Ayahnya pernah menjadi pandai besi dan penemu dan pernah bekerja di California dengan tali besi.'

In [21]:
encoded = encoder.encode(f"parafrasa: {cleaning(test['test_before'][1])}") + [1]
f, b, n = sess.run([model.fast_result, model.beam_result, model.nucleus_result], 
             feed_dict = {model.X: [encoded], model.top_p: 0.7})

(encoder.decode(f[0].tolist()), 
encoder.decode(b[0].tolist()), 
encoder.decode(n[0].tolist()))

('Bapanya kembali sebagai pemain biola dari Sekolah Rusia ke Bombay.',
 'Bapanya kembali sebagai pemain biola dari Sekolah Rusia ke Bombay.',
 'Bapanya kembali sebagai pemain biola dari Sekolah Rusia ke Bombay.')

In [22]:
test['test_after'][1]

'Bapanya kembali sebagai pemain biola dari Sekolah Rusia ke Bombay.'

In [23]:
encoded = encoder.encode(f"parafrasa: {cleaning(test['test_before'][2])}") + [1]
f, b, n = sess.run([model.fast_result, model.beam_result, model.nucleus_result], 
             feed_dict = {model.X: [encoded], model.top_p: 0.7})

(encoder.decode(f[0].tolist()), 
encoder.decode(b[0].tolist()), 
encoder.decode(n[0].tolist()))

('Sebuah EP yang disebut Dido Live dengan tiga daripada tujuh belas trek langsung dalam DVD dilancarkan secara digital secara digital pada 21 Jun 2005 melalui iTunes Store.',
 'Sebuah EP yang disebut Dido Live dengan tiga daripada tujuh belas trek langsung dalam DVD dilancarkan secara digital secara digital pada 21 Jun 2005 melalui iTunes Store.',
 'Sebuah EP yang disebut Dido Live, dengan tiga daripada tujuh belas trek langsung dalam DVD, dilancarkan secara digital secara digital melalui iTunes Store pada 21 Jun 2005.')

In [24]:
test['test_after'][2]

'Sebuah EP yang dinamakan Dido Live dengan tiga daripada tujuh belas lagu langsung dalam DVD dilancarkan secara digital pada 21 Jun 2005 secara eksklusif di iTunes Store.'

In [25]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'transformer-base/model.ckpt')

'transformer-base/model.ckpt'

In [26]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'greedy' in n.name
        or 'beam' in n.name
        or 'nucleus' in n.name
        or 'alphas' in n.name
        or 'self/Softmax' in n.name)
        and 'adam' not in n.name
        and 'beta' not in n.name
        and 'global_step' not in n.name
        and 'modality' not in n.name
        and 'Assign' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'transformer/body/target_space_embedding/kernel/Read/ReadVariableOp',
 'transformer/parallel_0_4/transformer/transformer/body/target_space_embedding/ReadVariableOp',
 'transformer/body/encoder/layer_0/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Read/ReadVariableOp',
 'transformer/body/encoder/layer_0/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Read/ReadVariableOp',
 'transformer/parallel_0_4/transformer/transformer/body/encoder/layer_0/self_attention/layer_prepostprocess/layer_norm/ReadVariableOp',
 'transformer/parallel_0_4/transformer/transformer/body/encoder/layer_0/self_attention/layer_prepostprocess/layer_norm/ReadVariableOp_1',
 'transformer/body/encoder/layer_0/self_attention/multihead_attention/q/kernel/Read/ReadVariableOp',
 'transformer/parallel_0_4/transformer/transformer/body/encoder/layer_0/self_attention/multihead_attention/q/Tensordot/ReadVariableOp',
 'transformer/body/encoder/la

In [27]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [28]:
freeze_graph('transformer-base', strings)

INFO:tensorflow:Restoring parameters from transformer-base/model.ckpt


INFO:tensorflow:Restoring parameters from transformer-base/model.ckpt


Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`


Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`


Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


INFO:tensorflow:Froze 293 variables.


INFO:tensorflow:Froze 293 variables.


INFO:tensorflow:Converted 293 variables to const ops.


INFO:tensorflow:Converted 293 variables to const ops.


20955 ops in the final graph.


In [29]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [30]:
g = load_graph('transformer-base/frozen_model.pb')
x = g.get_tensor_by_name('import/Placeholder:0')
greedy = g.get_tensor_by_name('import/greedy:0')
beam = g.get_tensor_by_name('import/beam:0')
nucleus = g.get_tensor_by_name('import/nucleus:0')
test_sess = tf.InteractiveSession(graph = g)

In [31]:
top_p = g.get_tensor_by_name('import/Placeholder_2:0')

In [32]:
g, b, n = test_sess.run([greedy, beam, nucleus], feed_dict = {x:[encoded],
                                                             top_p: 0.7})

In [33]:
(encoder.decode(f[0].tolist()), 
encoder.decode(b[0].tolist()), 
encoder.decode(n[0].tolist()))

('Sebuah EP yang disebut Dido Live dengan tiga daripada tujuh belas trek langsung dalam DVD dilancarkan secara digital secara digital pada 21 Jun 2005 melalui iTunes Store.',
 'Sebuah EP yang disebut Dido Live dengan tiga daripada tujuh belas trek langsung dalam DVD dilancarkan secara digital secara digital pada 21 Jun 2005 melalui iTunes Store.',
 'Sebuah EP yang disebut Dido Live dengan tiga daripada tujuh belas trek langsung dalam DVD dilancarkan secara digital secara digital secara eksklusif melalui iTunes Store pada 21 Jun 2005.')