In [None]:
import tensorflow as tf


from utils.dataprocess import *
from utils.utils_ import *
import argparse
import sys
import numpy as np
from opennmt.inputters.text_inputter import load_pretrained_embeddings



import os
import ipdb
import copy
import yaml
import io

from tensorflow.python.eager import context
from tensorflow.python.framework import common_shapes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.keras import activations
from tensorflow.python.keras import backend as K
from tensorflow.python.keras import constraints
from tensorflow.python.keras import initializers
from tensorflow.python.keras import regularizers
from tensorflow.python.layers.base import InputSpec
from tensorflow.python.layers.base import Layer
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import standard_ops
from tensorflow.python.util.tf_export import tf_export

In [25]:
import tensorflow as tf
import opennmt as onmt
from opennmt.utils.misc import count_lines
from opennmt.utils.parallel import GraphDispatcher
from opennmt import constants
from opennmt.utils.losses import cross_entropy_sequence_loss
from opennmt.utils.optim import *
from opennmt.utils.evaluator import *




In [2]:
def get_dataset_size(data_file):
    return count_lines(data_file)

In [3]:
def create_embeddings(vocab_size, depth=512):
      """Creates an embedding variable."""
      return tf.get_variable("embedding", shape = [vocab_size, depth])

In [4]:
def load_vocab(vocab_path, vocab_size):
    if not vocab_size:
        vocab_size = count_lines(vocab_path) + 1 #for UNK
    vocab = tf.contrib.lookup.index_table_from_file(vocab_path, vocab_size = vocab_size - 1, num_oov_buckets = 1)
    return vocab, vocab_size

In [5]:
def get_padded_shapes(dataset):    
    return tf.contrib.framework.nest.map_structure(
    lambda shape: shape.as_list(), dataset.output_shapes)

In [6]:
def filter_irregular_batches(multiple):    
    if multiple == 1:
        return lambda dataset: dataset

    def _predicate(*x):
        flat = tf.contrib.framework.nest.flatten(x)
        batch_size = tf.shape(flat[0])[0]
        return tf.equal(tf.mod(batch_size, multiple), 0)

    return lambda dataset: dataset.filter(_predicate)

In [7]:
def prefetch_element(buffer_size=None):  
    support_auto_tuning = hasattr(tf.data, "experimental") or hasattr(tf.contrib.data, "AUTOTUNE")
    if not support_auto_tuning and buffer_size is None:
        buffer_size = 1
    return lambda dataset: dataset.prefetch(buffer_size)

In [8]:
def load_data(src_path, src_vocab, batch_size=32, batch_type ="examples", batch_multiplier = 1, tgt_path=None, tgt_vocab=None, 
              max_len=50, bucket_width = 1, mode="Training", padded_shapes = None, 
              shuffle_buffer_size = None, prefetch_buffer_size = 100000, num_threads = 4, version=None, distribution=None, tf_idf_table=None):

    batch_size = batch_size * batch_multiplier
    print("batch_size", batch_size)
    
    def _make_dataset(text_path):
        dataset = tf.data.TextLineDataset(text_path)
        dataset = dataset.map(lambda x: tf.string_split([x]).values) #split by spaces
        return dataset    
       
    def _batch_func(dataset):
        return dataset.padded_batch(batch_size,
                                    padded_shapes=padded_shapes or get_padded_shapes(dataset))

    def _key_func(dataset):                
        #bucket_id = tf.squeeze(dataset["domain"])
        features_length = dataset["src_length"] #features_length_fn(features) if features_length_fn is not None else None
        labels_length = dataset["tgt_length"] #labels_length_fn(labels) if labels_length_fn is not None else None        
        bucket_id = tf.constant(0, dtype=tf.int32)
        if features_length is not None:
            bucket_id = tf.maximum(bucket_id, features_length // bucket_width)
        if labels_length is not None:
            bucket_id = tf.maximum(bucket_id, labels_length // bucket_width)
        return tf.cast(bucket_id, tf.int64)
        #return tf.to_int64(bucket_id)

    def _reduce_func(unused_key, dataset):
        return _batch_func(dataset)

    def _window_size_func(key):
        if bucket_width > 1:
            key += 1  # For bucket_width == 1, key 0 is unassigned.
        size = batch_size // (key * bucket_width)
        if batch_multiplier > 1:
            # Make the window size a multiple of batch_multiplier.
            size = size + batch_multiplier - size % batch_multiplier
        return tf.to_int64(tf.maximum(size, batch_multiplier))             
    
    bos = tf.constant([constants.START_OF_SENTENCE_ID], dtype=tf.int64)
    eos = tf.constant([constants.END_OF_SENTENCE_ID], dtype=tf.int64)
    
    if version==None:
        print("old dataprocessing version")
        src_dataset = _make_dataset(src_path)            
        if mode=="Training":
            tgt_dataset = _make_dataset(tgt_path)
            dataset = tf.data.Dataset.zip((src_dataset, tgt_dataset))
        elif mode=="Inference":
            dataset = src_dataset
        elif mode == "Predict":
            dataset = src_dataset

        if mode=="Training":                    
            dataset = dataset.map(lambda x,y:{                      
                    "src_raw": x,
                    "tgt_raw": y,
                    "src_ids": src_vocab.lookup(x),
                    "tgt_ids": tgt_vocab.lookup(y),
                    "tgt_ids_in": tf.concat([bos, tgt_vocab.lookup(y)], axis=0),
                    "tgt_ids_out": tf.concat([tgt_vocab.lookup(y), eos], axis=0),
                    "src_length": tf.shape(src_vocab.lookup(x))[0],
                    "tgt_length": tf.shape(tgt_vocab.lookup(y))[0],                
                    }, num_parallel_calls=num_threads)    
                       
        elif mode == "Inference":            
            dataset = dataset.map(lambda x:{                    
                    "src_raw": x,                
                    "src_ids": src_vocab.lookup(x),                
                    "src_length": tf.shape(src_vocab.lookup(x))[0],                
                    }, num_parallel_calls=num_threads) 
            
        elif mode == "Predict":            
            dataset = dataset.map(lambda x:{
                    "src_raw": x,                
                    "src_ids": src_vocab.lookup(x),                
                    "src_length": tf.shape(src_vocab.lookup(x))[0],                
                    }, num_parallel_calls=num_threads)
            
        if mode=="Training":            
            if shuffle_buffer_size is not None and shuffle_buffer_size != 0:            
                dataset_size = get_dataset_size(src_path) 
                if dataset_size is not None:
                    if shuffle_buffer_size < 0:
                        shuffle_buffer_size = dataset_size
                elif shuffle_buffer_size < dataset_size:        
                    dataset = dataset.apply(random_shard(shuffle_buffer_size, dataset_size))        
                dataset = dataset.shuffle(shuffle_buffer_size)

            dataset = dataset.filter(lambda x: tf.logical_and(tf.logical_and(tf.greater(x["src_length"],0), tf.greater(x["tgt_length"], 0)), tf.logical_and(tf.less_equal(x["src_length"], max_len), tf.less_equal(x["tgt_length"], max_len))))
            
            if bucket_width is None:
                dataset = dataset.apply(_batch_func)
            else:
                if hasattr(tf.data, "experimental"):
                    group_by_window_fn = tf.data.experimental.group_by_window
                else:
                    group_by_window_fn = tf.contrib.data.group_by_window
                print("batch type: ", batch_type)
                if batch_type == "examples":
                    dataset = dataset.apply(group_by_window_fn(_key_func, _reduce_func, window_size = batch_size))
                elif batch_type == "tokens":
                    dataset = dataset.apply(group_by_window_fn(_key_func, _reduce_func, window_size_func = _window_size_func))   
                else:
                    raise ValueError(
                            "Invalid batch type: '{}'; should be 'examples' or 'tokens'".format(batch_type))
            dataset = dataset.apply(filter_irregular_batches(batch_multiplier))             
            dataset = dataset.repeat()
            dataset = dataset.apply(prefetch_element(buffer_size=prefetch_buffer_size))                        
        else:
            dataset = dataset.apply(_batch_func)                      
        
    return dataset.make_initializable_iterator()

In [9]:
class Model:

    def _compute_loss(self, outputs, tgt_ids_batch, tgt_length, params, mode):
        
        if mode == "Training":
            mode = tf.estimator.ModeKeys.TRAIN            
        else:
            mode = tf.estimator.ModeKeys.EVAL            
          
        if self.Loss_type == "Cross_Entropy":
            if isinstance(outputs, dict):
                logits = outputs["logits"]
                attention = outputs.get("attention")
            else:
                logits = outputs
                attention = None 
                            
            loss, loss_normalizer, loss_token_normalizer = cross_entropy_sequence_loss(
                logits,
                tgt_ids_batch, 
                tgt_length + 1,                                                         
                label_smoothing = params.get("label_smoothing", 0.0),
                average_in_time = params.get("average_loss_in_time", True),
                mode = mode
            )
            return loss, loss_normalizer, loss_token_normalizer
        
    
    def _initializer(self, params):
        
        if params["Architecture"] == "Transformer":
            print("tf.variance_scaling_initializer")
            return tf.variance_scaling_initializer(
        mode="fan_avg", distribution="uniform", dtype=self.dtype)
        else:            
            param_init = params.get("param_init")
            if param_init is not None:
                print("tf.random_uniform_initializer")
                return tf.random_uniform_initializer(
              minval=-param_init, maxval=param_init, dtype=self.dtype)
        return None
        
    def __init__(self, config_file, mode, test_feature_file=None):

        def _normalize_loss(num, den=None):
            """Normalizes the loss."""
            if isinstance(num, list):  # Sharded mode.
                if den is not None:
                    assert isinstance(den, list)
                    return tf.add_n(num) / tf.add_n(den) #tf.reduce_mean([num_/den_ for num_,den_ in zip(num, den)]) #tf.add_n(num) / tf.add_n(den)
                else:
                    return tf.reduce_mean(num)
            elif den is not None:
                return num / den
            else:
                return num

        def _extract_loss(loss, Loss_type="Cross_Entropy"):
            """Extracts and summarizes the loss."""
            losses = None
            print("loss numb:", len(loss))
            if Loss_type=="Cross_Entropy":
                if not isinstance(loss, tuple):                    
                    print(1)
                    actual_loss = _normalize_loss(loss)
                    tboard_loss = actual_loss
                    tf.summary.scalar("loss", tboard_loss)
                    losses = actual_loss                    
                else:                         
                    actual_loss = _normalize_loss(loss[0], den=loss[1])
                    tboard_loss = _normalize_loss(loss[0], den=loss[2]) if len(loss) > 2 else actual_loss
                    tf.summary.scalar("loss", tboard_loss)            
                    losses = actual_loss

            return losses                         

        def _loss_op(inputs, params, mode):
            """Single callable to compute the loss."""
            logits, _, tgt_ids_out, tgt_length  = self._build(inputs, params, mode)
            losses = self._compute_loss(logits, tgt_ids_out, tgt_length, params, mode)
            
            return losses

        with open(config_file, "r") as stream:
            config = yaml.load(stream)

        Loss_type = config.get("Loss_Function","Cross_Entropy")
        
        self.Loss_type = Loss_type
        self.config = config 
        self.using_tf_idf = config.get("using_tf_idf", False)
        
        train_batch_size = config["training_batch_size"]   
        eval_batch_size = config["eval_batch_size"]
        max_len = config["max_len"]
        
        example_sampling_distribution = config.get("example_sampling_distribution",None)
        self.dtype = tf.float32
        
        # Input pipeline:
        # Return lookup table of type index_table_from_file
        src_vocab, _ = load_vocab(config["src_vocab_path"], config["src_vocab_size"])
        tgt_vocab, _ = load_vocab(config["tgt_vocab_path"], config["tgt_vocab_size"])
        
        load_data_version = config.get("dataprocess_version",None)
        
        if mode == "Training":    
            print("num_devices", config.get("num_devices",1))
            
            dispatcher = GraphDispatcher(
                config.get("num_devices",1), 
                daisy_chain_variables=config.get("daisy_chain_variables",False), 
                devices= config.get("devices",None)
            ) 
            
            batch_multiplier = config.get("num_devices", 1)
            num_threads = config.get("num_threads", 4)
            
            if Loss_type == "Wasserstein":
                self.using_tf_idf = True
                
            if self.using_tf_idf:
                tf_idf_table = build_tf_idf_table(
                    config["tgt_vocab_path"], 
                    config["tgt_vocab_size"], 
                    config["domain_numb"], 
                    config["training_feature_file"])           
                self.tf_idf_table = tf_idf_table
                
            iterator = load_data(
                config["training_label_file"], 
                src_vocab, 
                batch_size = train_batch_size, 
                batch_type=config["training_batch_type"], 
                batch_multiplier = batch_multiplier, 
                tgt_path=config["training_feature_file"], 
                tgt_vocab=tgt_vocab, 
                max_len = max_len, 
                mode=mode, 
                shuffle_buffer_size = config["sample_buffer_size"], 
                num_threads = num_threads, 
                version = load_data_version, 
                distribution = example_sampling_distribution
            )
            
            inputs = iterator.get_next()
            data_shards = dispatcher.shard(inputs)

            with tf.variable_scope(config["Architecture"], initializer=self._initializer(config)):
                losses_shards = dispatcher(_loss_op, data_shards, config, mode)

            self.loss = _extract_loss(losses_shards, Loss_type=Loss_type) 

        elif mode == "Inference": 
            assert test_feature_file != None
            
            iterator = load_data(
                test_feature_file, 
                src_vocab, 
                batch_size = eval_batch_size, 
                batch_type = "examples", 
                batch_multiplier = 1, 
                max_len = max_len, 
                mode = mode, 
                version = load_data_version
            )
            
            inputs = iterator.get_next() 
            
            with tf.variable_scope(config["Architecture"]):
                _ , self.predictions, _, _ = self._build(inputs, config, mode)
            
        self.iterator = iterator
        self.inputs = inputs
        
    def loss_(self):
        return self.loss
    
    def prediction_(self):
        return self.predictions
   
    def inputs_(self):
        return self.inputs
    
    def iterator_initializers(self):
        if isinstance(self.iterator,list):
            return [iterator.initializer for iterator in self.iterator]
        else:
            return [self.iterator.initializer]        
           
    def _build(self, inputs, config, mode):        

        debugging = config.get("debugging", False)
        Loss_type = self.Loss_type       
        print("Loss_type: ", Loss_type)           

        hidden_size = config["hidden_size"]       
        print("hidden size: ", hidden_size)
                
        tgt_vocab_rev = tf.contrib.lookup.index_to_string_table_from_file(config["tgt_vocab_path"], vocab_size= int(config["tgt_vocab_size"]) - 1, default_value=constants.UNKNOWN_TOKEN)
        end_token = constants.END_OF_SENTENCE_ID
        # Embedding        
        size_src = config.get("src_embedding_size",512)
        size_tgt = config.get("tgt_embedding_size",512)
        with tf.variable_scope("src_embedding"):
            src_emb = create_embeddings(config["src_vocab_size"], depth=size_src)

        with tf.variable_scope("tgt_embedding"):
            tgt_emb = create_embeddings(config["tgt_vocab_size"], depth=size_tgt)

        self.tgt_emb = tgt_emb
        self.src_emb = src_emb

        # Build encoder, decoder
        if config["Architecture"] == "GRU":
            nlayers = config.get("nlayers",4)
            encoder = onmt.encoders.BidirectionalRNNEncoder(nlayers, hidden_size, reducer=onmt.layers.ConcatReducer(), cell_class = tf.contrib.rnn.GRUCell, dropout=0.1, residual_connections=True)
            decoder = onmt.decoders.AttentionalRNNDecoder(nlayers, hidden_size, bridge=onmt.layers.CopyBridge(), cell_class=tf.contrib.rnn.GRUCell, dropout=0.1, residual_connections=True)
        elif config["Architecture"] == "LSTM":
            nlayers = config.get("nlayers",4)
            encoder = onmt.encoders.BidirectionalRNNEncoder(nlayers, num_units=hidden_size, reducer=onmt.layers.ConcatReducer(), cell_class=tf.nn.rnn_cell.LSTMCell,
                                                          dropout=0.1, residual_connections=True)
            decoder = onmt.decoders.AttentionalRNNDecoder(nlayers, num_units=hidden_size, bridge=onmt.layers.CopyBridge(), attention_mechanism_class=tf.contrib.seq2seq.LuongAttention,
                                                         cell_class=tf.nn.rnn_cell.LSTMCell, dropout=0.1, residual_connections=True)
        elif config["Architecture"] == "Transformer":
            nlayers = config.get("nlayers",6)
            decoder = onmt.decoders.self_attention_decoder.SelfAttentionDecoder(nlayers, num_units=hidden_size, num_heads=8, ffn_inner_dim=2048, dropout=0.1, attention_dropout=0.1, relu_dropout=0.1)
            encoder = onmt.encoders.self_attention_encoder.SelfAttentionEncoder(nlayers, num_units=hidden_size, num_heads=8, ffn_inner_dim=2048, dropout=0.1, attention_dropout=0.1, relu_dropout=0.1)       
        print("Model type: ", config["Architecture"])

        if mode =="Training":            
            print("Building model in Training mode")
        elif mode == "Inference":
            print("Build model in Inference mode")
        start_tokens = tf.fill([tf.shape(inputs["src_ids"])[0]], constants.START_OF_SENTENCE_ID)
                    
        emb_src_batch = tf.nn.embedding_lookup(src_emb, inputs["src_ids"]) # dim = [batch, length, depth]

        self.emb_src_batch = emb_src_batch
        print("emb_src_batch: ", emb_src_batch)
 
        if mode=="Training":
            emb_tgt_batch = tf.nn.embedding_lookup(tgt_emb, inputs["tgt_ids_in"])    
            self.emb_tgt_batch = emb_tgt_batch
            print("emb_tgt_batch: ", emb_tgt_batch)                   
                
        src_length = inputs["src_length"]
        
        if mode =="Training":
            tgt_ids_batch = inputs["tgt_ids_out"]
            
        with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE):
            if mode=="Training":
                encoder_output = encoder.encode(emb_src_batch, sequence_length = src_length, mode=tf.estimator.ModeKeys.TRAIN)
            else:
                encoder_output = encoder.encode(emb_src_batch, sequence_length = src_length, mode=tf.estimator.ModeKeys.PREDICT)
            self.encoder_output = encoder_output
        tgt_length = inputs["tgt_length"]
        output_layer = None
        if mode == "Training":    
            if Loss_type == "Cross_Entropy":
                with tf.variable_scope("decoder"):                           
                    if config.get("Standard",True):
                        logits, _, _, attention = decoder.decode(
                                              emb_tgt_batch, 
                                              tgt_length + 1,
                                              vocab_size = int(config["tgt_vocab_size"]),
                                              initial_state = encoder_output[1],
                                              output_layer = output_layer,                                              
                                              mode = tf.estimator.ModeKeys.TRAIN,
                                              memory = encoder_output[0],
                                              memory_sequence_length = encoder_output[2],
                                              return_alignment_history = True) 
                    else:
                        logits, _, _, attention = decoder.decode(
                                              emb_tgt_batch,
                                              tgt_length + 1,
                                              emb_mask = position_mask_tgt,
                                              vocab_size = int(config["tgt_vocab_size"]),
                                              initial_state = encoder_output[1],
                                              output_layer = output_layer,
                                              mode = tf.estimator.ModeKeys.TRAIN,
                                              memory = encoder_output[0],
                                              memory_sequence_length = encoder_output[2],
                                              return_alignment_history = True)
                    outputs = {
                           "logits": logits
                           }           

        if mode != "Training":
                            
            with tf.variable_scope("decoder"):        
                beam_width = config.get("beam_width", 5)
                print("Inference with beam width %d"%(beam_width))
                maximum_iterations = config.get("maximum_iterations", 250)
               
                if beam_width <= 1:                
                    sampled_ids, _, sampled_length, log_probs, alignment = decoder.dynamic_decode(
                                                                                    tgt_emb,
                                                                                    start_tokens,
                                                                                    end_token,
                                                                                    vocab_size=int(config["tgt_vocab_size"]),
                                                                                    initial_state=encoder_output[1],
                                                                                    maximum_iterations=maximum_iterations,
                                                                                    output_layer = output_layer,
                                                                                    mode=tf.estimator.ModeKeys.PREDICT,
                                                                                    memory=encoder_output[0],
                                                                                    memory_sequence_length=encoder_output[2],
                                                                                    dtype=tf.float32,
                                                                                    return_alignment_history=True)
                else:
                    length_penalty = config.get("length_penalty", 0)
                    sampled_ids, _, sampled_length, log_probs, alignment = decoder.dynamic_decode_and_search(
                                                          tgt_emb,
                                                          start_tokens,
                                                          end_token,
                                                          vocab_size = int(config["tgt_vocab_size"]),
                                                          initial_state = encoder_output[1],
                                                          beam_width = beam_width,
                                                          length_penalty = length_penalty,
                                                          maximum_iterations = maximum_iterations,
                                                          output_layer = output_layer,
                                                          mode = tf.estimator.ModeKeys.PREDICT,
                                                          memory = encoder_output[0],
                                                          memory_sequence_length = encoder_output[2],
                                                          dtype=tf.float32,
                                                          return_alignment_history = True)
                    
                   
            target_tokens = tgt_vocab_rev.lookup(tf.cast(sampled_ids, tf.int64))
            
            predictions = {
              "tokens": target_tokens,
              "length": sampled_length,
              "log_probs": log_probs,
              "alignment": alignment,
            }
            tgt_ids_batch = None
            tgt_length = None
        else:
            predictions = None

        self.outputs = outputs
        
        return outputs, predictions, tgt_ids_batch, tgt_length         

In [None]:
import tensorflow as tf
import opennmt as onmt
from opennmt.utils.optim import *
from utils.dataprocess import *
from utils.utils_ import *
import argparse
import sys
import numpy as np
from opennmt.inputters.text_inputter import load_pretrained_embeddings
from opennmt.utils.losses import cross_entropy_sequence_loss
from opennmt.utils.evaluator import *
from model import *

import ipdb

import io
from tensorflow.python.framework import ops
import datetime

In [10]:
import yaml
import os

In [11]:
config_file = 'config_tuanh.yml'

In [12]:
with open(config_file, "r") as stream:
    config = yaml.load(stream)

  


In [None]:
config.keys()

In [None]:
config['optimizer_parameters']

In [None]:
config['model_dir']

In [13]:
# Eval directory stores prediction files
if not os.path.exists(os.path.join(config["model_dir"],"eval")):
    os.makedirs(os.path.join(config["model_dir"],"eval"))

In [14]:
training_model = Model(config_file, "Training")



num_devices 2
batch_size 6144
old dataprocessing version
batch type:  tokens
tf.variance_scaling_initializer
Loss_type:  Cross_Entropy
hidden size:  512
Model type:  Transformer
Building model in Training mode
emb_src_batch:  Tensor("Transformer/parallel_0/Transformer/embedding_lookup:0", shape=(?, ?, 512), dtype=float32, device=/device:GPU:0)
emb_tgt_batch:  Tensor("Transformer/parallel_0/Transformer/embedding_lookup_1:0", shape=(?, ?, 512), dtype=float32, device=/device:GPU:0)
Loss_type:  Cross_Entropy
hidden size:  512
Model type:  Transformer
Building model in Training mode
emb_src_batch:  Tensor("Transformer/parallel_1/Transformer/embedding_lookup:0", shape=(?, ?, 512), dtype=float32, device=/device:GPU:1)
emb_tgt_batch:  Tensor("Transformer/parallel_1/Transformer/embedding_lookup_1:0", shape=(?, ?, 512), dtype=float32, device=/device:GPU:1)
loss numb: 3


In [15]:
global_step = tf.train.create_global_step()

In [16]:
if config.get("Loss_Function","Cross_Entropy")=="Cross_Entropy":
     generator_total_loss = training_model.loss_()

In [17]:
inputs = training_model.inputs_()

In [18]:
print(tf.get_default_graph().get_all_collection_keys())

['table_initializer', 'asset_filepaths', 'variables', 'trainable_variables', 'update_ops', 'model_variables', 'summaries', 'global_step']


In [19]:
print(tf.get_default_graph())

<tensorflow.python.framework.ops.Graph object at 0x7fd4774e2080>


In [22]:
if config["mode"] == "Training":
    optimizer_params = config["optimizer_parameters"]
    with tf.variable_scope("main_training"):
        train_op, accum_vars_ = optimize_loss(generator_total_loss, config["optimizer_parameters"])

In [23]:
Eval_dataset_numb = len(config["eval_label_file"])
print("Number of validation set: ", Eval_dataset_numb)
external_evaluator = [None] * Eval_dataset_numb 
writer_bleu = [None] * Eval_dataset_numb 

Number of validation set:  1


In [26]:
for i in range(Eval_dataset_numb):
    external_evaluator[i] = BLEUEvaluator(config["eval_label_file"][i], config["model_dir"])
    writer_bleu[i] = tf.summary.FileWriter(os.path.join(config["model_dir"],"BLEU","domain_%d"%i))

In [None]:
config["eval_label_file"]

In [None]:
with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
    
    writer = tf.summary.FileWriter(config["model_dir"])    
    var_list_ = tf.global_variables()
    for v in tf.trainable_variables():
        if v not in tf.global_variables():
            var_list_.append(v)
    for v in var_list_:
        print(v.name)
    saver = tf.train.Saver(var_list_, max_to_keep=config["max_to_keep"])
    checkpoint_path = tf.train.latest_checkpoint(config["model_dir"])
        
    sess.run([v.initializer for v in var_list_])    

    sess.run([v.initializer for v in accum_vars_])
    
    training_summary = tf.summary.merge_all()
    global_step_ = sess.run(global_step) 
    if checkpoint_path:
        print("Continue training:...")
        print("Load parameters from %s"%checkpoint_path)
        saver.restore(sess, checkpoint_path)        
        global_step_ = sess.run(global_step)
        print("global_step: ", global_step_)
                            
        for i in range(Eval_dataset_numb):
            prediction_file = inference(config_file, checkpoint_path, config["eval_feature_file"][i])
            score = external_evaluator[i].score(config["eval_label_file"][i], prediction_file)
            print("BLEU at checkpoint %s for testset %s: %f"%(checkpoint_path, config["eval_feature_file"][i], score))            
                
    else:
        print("Training from scratch")
        
    tf.tables_initializer().run()    
    sess.run(training_model.iterator_initializers())
    total_loss = []            
   
    while global_step_ <= config["iteration_number"]:                       

        loss_, global_step_, _ = sess.run([generator_total_loss, global_step, train_op])               
        total_loss.append(loss_)
        
        if (np.mod(global_step_, config["printing_freq"])) == 0:            
            print((datetime.datetime.now()))
            print(("Loss at step %d"%(global_step_), np.mean(total_loss)))                
            
        if (np.mod(global_step_, config["summary_freq"])) == 0:
            training_summary_ = sess.run(training_summary)
            writer.add_summary(training_summary_, global_step=global_step_)
            writer.flush()
            total_loss = []
            
        if (np.mod(global_step_, config["save_freq"])) == 0 and global_step_ > 0:    
            print((datetime.datetime.now()))
            checkpoint_path = os.path.join(config["model_dir"], 'model.ckpt')
            print(("save to %s"%(checkpoint_path)))
            saver.save(sess, checkpoint_path, global_step = global_step_)
                                                                                                                 
        if (np.mod(global_step_, config["eval_freq"])) == 0 and global_step_ >0: 
            checkpoint_path = tf.train.latest_checkpoint(config["model_dir"])
            for i in range(Eval_dataset_numb):
                prediction_file = inference(config_file, checkpoint_path, config["eval_feature_file"][i])
                score = external_evaluator[i].score(config["eval_label_file"][i], prediction_file)
                print("BLEU at checkpoint %s for testset %s: %f"%(checkpoint_path,config["eval_label_file"][i], score))
                score_summary = tf.Summary(value=[tf.Summary.Value(tag="eval_score_%d"%i, simple_value=score)])
                writer_bleu[i].add_summary(score_summary, global_step_)
                writer_bleu[i].flush()