In [0]:
# !pip install tf-nightly

In [1]:
# prepare dataset

from utility import load_train_data
from collections import Counter
import os
import numpy as np


In [2]:
import tensorflow as tf
tf.enable_eager_execution()

print("tf version: ", tf.VERSION)

tf version:  1.13.0-dev20190111


In [3]:
import random as rn

np.random.seed(42)
rn.seed(12345)
tf.set_random_seed(1234)

In [4]:
# prepare train data

def parse_file(file):
    for line in file:
        line = line.rstrip('\n')
        sentence = line.split(' ')
        yield sentence

# TODO: current method does not allow the model to learn boundary beyond bigram.
def adjust_size(sentences, sentence_size):
    # Increment sentence size for shifting output later
    sentence_size_plus = sentence_size + 1

    for sentence in sentences:
        # Insert BOS = Beginning Of Sentence
        sentence.insert(0, '_BOS/_BOS')

        # Split long sentence allowing overlap of 1 word
        while len(sentence) >= sentence_size_plus:
            yield sentence[:sentence_size_plus]
            sentence = sentence[sentence_size:]

        # Do not yield EOS-only sentence
        if sentence:
            # Insert EOS = End Of Sentence
            sentence.append('_EOS/_EOS')

            if len(sentence) < sentence_size_plus:
                # Padding sentence to make its size sentence_size_plus
                sentence += ['_PAD/_PAD'] * (sentence_size_plus - len(sentence))
            yield sentence
        

def create_vocabulary(sentences, vocabulary_size):
    # Create list of words indexed by word ID
    counter = Counter(word for words in sentences for word in words)
    most_common = counter.most_common(vocabulary_size - 1)
    vocabulary = [word for word, count in most_common]
    vocabulary.insert(0, '_UNK/_UNK')
    return vocabulary


def convert_to_ids(sentences, vocabulary):
    dictionary = dict((word, word_id) for word_id, word in enumerate(vocabulary))

    for sentence in sentences:
        word_ids = []

        for word in sentence:
            if word in dictionary:
                word_id = dictionary[word]
            else:
                word_id = dictionary['_UNK/_UNK']
            word_ids.append(word_id)

        yield word_ids

        
# TODO: current batching ignores sentences that does't fit into last batch.
def create_batches(sentences, batch_size):
    all_batches = int(len(sentences) / batch_size)

    for i in range(all_batches):
        batch_sentences = sentences[i * batch_size:(i + 1) * batch_size]
        batch_input = []
        batch_output = []

        for sentence in batch_sentences:
            # Shift sentence by 1 time step
            input_ = sentence[:-1]
            output_ = sentence[1:]

            batch_input.append(input_)
            batch_output.append(output_)

        yield batch_input, batch_output    

        
def create_pair(sentences):
    
    print("sentences count: ", len(sentences))
#     print(sentences)
    
    input_list = []
    output_list = []
    
    i=0
    for sentence in sentences:
#         print("aaa i: {}", i)
        input_ = sentence[:-1]
        output_ = sentence[1:]
        
        input_list.append(input_)
        output_list.append(output_)
        
        i = i + 1
        
    return input_list, output_list
        
def save_metadata(model_directory, vocabulary):
    # Create directory if not exists
    if not os.path.exists(model_directory):
        os.makedirs(model_directory)

#     # Save settings
#     settings_path = os.path.join(model_directory, 'settings.json')
#     with open(settings_path, 'w') as settings_file:
#         json.dump(vars(args), settings_file, indent=4)

    # Save vocabulary
    vocabulary_path = os.path.join(model_directory, 'vocabulary.txt')
    with open(vocabulary_path, 'w') as vocabulary_file:
        vocabulary_file.write('\n'.join(vocabulary))
        
        
def load_train_data(dataset_name, sentence_size, vocabulary_size, batch_size, model_directory ):

    sentences = parse_file(open(dataset_name))
    sentences = list(adjust_size(sentences, sentence_size))
    vocabulary = create_vocabulary(sentences, vocabulary_size)
    sentences = list(convert_to_ids(sentences, vocabulary))
#     train_data = list(create_batches(sentences, batch_size))
    save_metadata(model_directory, vocabulary)
    
    # target
    input_x, target_y = create_pair(sentences)
    
    return input_x, target_y
#     return train_data


In [5]:
# Load and preprocess training data

dataset_name = "wiki_dataset_mecab_80000.txt"
sentence_size = 30
BATCH_SIZE = 64
# BATCH_SIZE = 5
batch_size = BATCH_SIZE
vocabulary_size = 50000
model_directory = "models"


input_x, target_y = load_train_data(dataset_name, sentence_size, vocabulary_size, BATCH_SIZE, model_directory)

sentences count:  80103


In [6]:

print("input_x, count: {}", len(input_x))
print("target_y, count: {}", len(target_y))

print("input_x.0: ", input_x[0])
print("target_y.0: ", target_y[0])


input_x, count: {} 80103
target_y, count: {} 80103
input_x.0:  [3, 37098, 12, 9, 7833, 16536, 12, 6, 131, 16, 1471, 10, 19, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
target_y.0:  [37098, 12, 9, 7833, 16536, 12, 6, 131, 16, 1471, 10, 19, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [7]:
# parepare dataset

# BUFFER_SIZE = len(input_x)
BUFFER_SIZE = 50000

dataset = tf.data.Dataset.from_tensor_slices((input_x, target_y)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)


In [8]:
# # dataset.take(1)

for input_example, target_example in  dataset.take(1):
    
    input_x_0 = input_example.numpy()[0]
    print("input_x_0:", input_x_0, input_x_0.shape)
    output_y_0 = target_example.numpy()[0]
    print("output_y_0:", output_y_0, output_y_0.shape)

Instructions for updating:
Colocations handled automatically by placer.
input_x_0: [   3   48   42    5 2936   22 9774   16   33 1561    9 1040   11   24
   27    4    1    1    1    1    1    1    1    1    1    1    1    1
    1    1] (30,)
output_y_0: [  48   42    5 2936   22 9774   16   33 1561    9 1040   11   24   27
    4    1    1    1    1    1    1    1    1    1    1    1    1    1
    1    1] (30,)


In [9]:
# if tf.test.is_gpu_available():
#   rnn = tf.keras.layers.CuDNNGRU
# else:
#   import functools
#   rnn = functools.partial(
#     tf.keras.layers.GRU, recurrent_activation='sigmoid')

# def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
#   model = tf.keras.Sequential([
      
#     tf.keras.layers.Embedding(vocab_size, embedding_dim, 
#                               batch_input_shape=[batch_size, None]),
#     rnn(rnn_units,
#         return_sequences=True, 
#         recurrent_initializer='glorot_uniform',
#         stateful=True),
      
#     tf.keras.layers.Dense(vocab_size)
      
#   ])
#   return model

In [10]:
# prepare model No.1


# def gru(units, backword_flg):
#     # If you have a GPU, we recommend using CuDNNGRU(provides a 3x speedup than GRU)
#     # the code automatically does that.
#     if tf.test.is_gpu_available():
#         return tf.keras.layers.CuDNNGRU(units, 
#                                         return_sequences=True, 
#                                         return_state=True, 
#                                         recurrent_initializer='glorot_uniform',
#                                         go_backwards=backword_flg)
#     else:
#         return tf.keras.layers.GRU(units, 
#                                    return_sequences=True, 
#                                    return_state=True, 
#                                    recurrent_activation='sigmoid', 
#                                    recurrent_initializer='glorot_uniform',
#                                    go_backwards=backword_flg)

def gru(units, backword_flg):
    # should use GRU anyway....
    return tf.keras.layers.GRU(units, 
                               return_sequences=True, 
                               return_state=True, 
                               recurrent_activation='sigmoid', 
                               recurrent_initializer='glorot_uniform',
                               go_backwards=backword_flg)
    
    
class KanaKanjiModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units, batch_size):
        super(KanaKanjiModel, self).__init__()
        self.batch_size = batch_size
        self.rnn_units = rnn_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, input_shape=(30,))
        self.gru_f = gru(self.rnn_units, False)
        self.gru_b = gru(self.rnn_units, True)
        self.vocab_size = vocab_size
        self.fc = tf.keras.layers.Dense(vocab_size)
        self.dropout = tf.keras.layers.Dropout(0.5)
        
        
    def call(self, x, hidden, training):
        
        # print("input.shape: ", x.shape)
        
        
        x = self.embedding(x)
         
        output, state = self.gru_f(x, initial_state = hidden)
        
        output = self.dropout(output, training=training)
        
        output = self.fc(output)
        # print("output.shape: ", output.shape)
        # print("state.shape: ", state.shape)
        
        
        # return output, state, 0
        return output, state
    
    def initialize_hidden_state(self):
        return tf.zeros((self.batch_size, self.rnn_units))
      
      
    def compute_output_shape(self, input_shape):
        # You need to override this function if you want to use the subclassed model
        # as part of a functional-style model.
        # Otherwise, this method is optional.
        
        print("input_shape: ", input_shape)
        
        shape = tf.TensorShape(input_shape).as_list()
        shape[-1] = self.num_classes
        
        return tf.TensorShape([tf.TensorShape([-1, 30, 50000]), tf.TensorShape([-1, 30, 400])])


In [11]:

hidden_size = 400
embedding_dim = hidden_size

model = KanaKanjiModel(vocabulary_size, embedding_dim, hidden_size, BATCH_SIZE)
# model = build_model(vocabulary_size, embedding_dim, hidden_size, BATCH_SIZE)

In [12]:
# it's not trivial to wrap already complicated model!!!

# x_input = tf.keras.layers.Input(shape=(30,))
# model1 = KanaKanjiModel(vocabulary_size, embedding_dim, hidden_size, BATCH_SIZE)(x_input)
# model = tf.keras.Model(inputs=x_input, outputs=model1)


In [13]:
# model.summary()

In [14]:
# model.summary()

In [15]:
tf.VERSION

'1.13.0-dev20190111'

In [16]:
# custom model don't have summary
# model.summary()

# speed up
model.call = tf.contrib.eager.defun(model.call)


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [17]:
# def loss_function(real, pred):
#     mask = 1 - np.equal(real, 0)
#     loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) * mask
#     return tf.reduce_mean(loss_)

optimizer3 = tf.train.AdamOptimizer()

In [18]:
# check points

checkpoint_dir = './ck_20190209'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer3,
                                 model=model)

In [19]:
# training

import time
 
# # Directory where the checkpoints will be saved
# checkpoint_dir3 = './training_checkpoints_80000_70'
# # Name of the checkpoint files
# checkpoint_prefix3 = os.path.join(checkpoint_dir3, "ckpt_{epoch}")

# Training step
EPOCHS = 10

for epoch in range(EPOCHS):
    start = time.time()
    
    # initializing the hidden state at the start of every epoch
    # initally hidden is None
    
    
    hidden_f = model.initialize_hidden_state()
    # hidden_f = tf.zeros((BATCH_SIZE, hidden_size))
    
    hidden_b = hidden_f
    
    loss2 = 0
    
    for (batch_n, (inp, target)) in enumerate(dataset):
        
        loss1 = 0
        
        with tf.GradientTape() as tape:
            # feeding the hidden state back into the model
            # This is the interesting step
    
            # predictions, hidden_f, _  = model(inp, hidden_f, True)
            predictions, hidden_f  = model(inp, hidden_f, True)
            # predictions  = model(inp)
        
            target = tf.expand_dims(target, 2)
            loss = tf.losses.sparse_softmax_cross_entropy(target, predictions)
            loss1 = tf.reduce_mean(loss)
            # print("loss333: ", loss333, loss1)

        loss1_np = float(loss1.numpy())
        batch_loss = (loss1_np / int(inp.shape[1]))
        
        grads = tape.gradient(loss1, model.variables)
        optimizer3.apply_gradients(zip(grads, model.variables))
        
        loss2 = loss2 + batch_loss
        
        if batch_n % 1000 == 0:
            template = 'Epoch {} Batch {} Loss {:.4f}'
            print(template.format(epoch+1, batch_n, loss1))

#     # saving (checkpoint) the model every 5 epochs
#     if (epoch + 1) % 5 == 0:
#         model.save_weights(checkpoint_prefix3.format(epoch=epoch))
        
    # saving (checkpoint) the model every 2 epochs
    if (epoch + 1) % 2 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

    print ('Epoch {} Loss {:.4f}'.format(epoch+1, loss2))
    print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

# model.save_weights(checkpoint_prefix3.format(epoch=epoch))


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Epoch 1 Batch 0 Loss 10.8198
Epoch 1 Batch 1000 Loss 3.9865
Epoch 1 Loss 170.5722
Time taken for 1 epoch 13400.2575507164 sec

Epoch 2 Batch 0 Loss 3.7296
Epoch 2 Batch 1000 Loss 3.0576
Epoch 2 Loss 128.7968
Time taken for 1 epoch 13533.78256893158 sec

Epoch 3 Batch 0 Loss 3.2049
Epoch 3 Batch 1000 Loss 2.8589
Epoch 3 Loss 116.3599
Time taken for 1 epoch 13260.481710672379 sec

Epoch 4 Batch 0 Loss 2.8040
Epoch 4 Batch 1000 Loss 2.6882
Epoch 4 Loss 109.9504
Time taken for 1 epoch 13177.888521194458 sec

Epoch 5 Batch 0 Loss 2.6604
Epoch 5 Batch 1000 Loss 2.5522
Epoch 5 Loss 103.8546
Time taken for 1 epoch 13548.72274518013 sec

Epoch 6 Batch 0 Loss 2.5280
Epoch 6 Batch 1000 Loss 2.3961
Epoch 6 Loss 97.8981
Time taken for 1 epoch 13494.05921292305 sec

Epoch 7 Batch 0 Loss 2.3890
Epoch 7 Batch 1000 Loss 2.2799
Epoch 7 Loss 92.0714
Time taken for 1 epoch 13152.815173149109 

KeyboardInterrupt: 

In [20]:
# model.save_weights(checkpoint_prefix3.format(epoch=epoch))

checkpoint.save(file_prefix = checkpoint_prefix)


'./ck_20190209/ckpt-5'

In [0]:
# # weight and model
# not working
# tf.keras.models.save_model(
#     model,
#     "model1111.h5",
# )

# not working
#model.save("model1111.h5")

# looks okay but can not load...
# model.save_weights("model2222.h5")

In [0]:
# # download  
# # files.download( "model2222.h5" ) 

# saving_filename = "./model2222.h5"

# file_metadata = {
#   'name': saving_filename,
#   'mimeType': 'application/octet-stream'
# }
# media = googleapiclient.http.MediaFileUpload(saving_filename, 
#                         mimetype='application/octet-stream',
#                         resumable=True)
# created = drive_service.files().create(body=file_metadata,
#                                        media_body=media,
#                                        fields='id').execute()

In [0]:
# # experiment
# # from tf.keras.backend import manual_variable_initialization
# tf.keras.backend.manual_variable_initialization(True)

In [21]:
# checkpoint.save(file_prefix = checkpoint_prefix)

In [0]:
# # download fines
# from google.colab import files

In [0]:
# # download vocabulary 
# files.download( "./models/vocabulary.txt" ) 

In [0]:
# checkpoint_dir3 = "training_checkpoints_80000_70"
# tf.train.latest_checkpoint(checkpoint_dir3)

'training_checkpoints_80000_70/ckpt-6'

In [0]:
# files.download( "./training_checkpoints_80000_70/checkpoint" ) 

In [0]:
# files.download( "./training_checkpoints_80000_70/ckpt-6.index" ) 

In [0]:
# import google.colab
# import googleapiclient.discovery
# import googleapiclient.http

In [0]:
# google.colab.auth.authenticate_user()
# drive_service = googleapiclient.discovery.build('drive', 'v3')

In [0]:
# saving_filename = "./training_checkpoints_80000_70/ckpt-6.data-00000-of-00001"

# file_metadata = {
#   'name': saving_filename,
#   'mimeType': 'application/octet-stream'
# }
# media = googleapiclient.http.MediaFileUpload(saving_filename, 
#                         mimetype='application/octet-stream',
#                         resumable=True)
# created = drive_service.files().create(body=file_metadata,
#                                        media_body=media,
#                                        fields='id').execute()

In [0]:
# load weight and check results!

In [24]:
# rebuild moel
hidden_size = 400
embedding_dim = hidden_size
vocabulary_size = 50000

model = KanaKanjiModel(vocabulary_size, embedding_dim, hidden_size, 1)

In [25]:
# # load model

# # Directory where the checkpoints will be saved
# checkpoint_dir3 = './training_checkpoints_80000_70'

# model.load_weights(tf.train.latest_checkpoint(checkpoint_dir3))

In [26]:
checkpoint_dir3 = './ck_20190209'
tf.train.latest_checkpoint(checkpoint_dir3)

'./ck_20190209/ckpt-5'

In [27]:
# load model

# load weight
checkpoint_dir = './ck_20190209'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer3,
                                 model=model)

status = checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
# status = checkpoint.restore("./training_checkpoints_ver4/ckpt-3")
print("status: ", status)

status:  <tensorflow.python.training.checkpointable.util.CheckpointLoadStatus object at 0x1288e8128>


In [28]:
# # coding: utf-8
# hiragana = "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをん"

# # ひらがなだけの文字列ならTrue
# def ishira(strj):
#     return all([ch in hiragana for ch in strj])

In [29]:
# ishira("きしゃののりかえ2")

False

In [49]:
import collections
import heapq
import operator

hiragana_added = []

def load_dictionary(model_directory):
    vocabulary_path = os.path.join(model_directory, 'vocabulary.txt')
    vocabulary = []
    for line in open(vocabulary_path):
        line = line.rstrip('\n')
        target, source = line.split('/', 1)
        vocabulary.append((target, source))

    dictionary = collections.defaultdict(list)
    for i, (target, source) in enumerate(vocabulary):
        dictionary[source].append((target, i))

    return dictionary

def create_lattice(input_, dictionary):
    lattice = [[[] for _ in range(len(input_) + 1)] for _ in range(len(input_) + 2)]
    _, unk_id = dictionary['_UNK'][0]

    for i in range(1, len(input_) + 1):
        for j in range(i):
            key = input_[j:i]
            if key in dictionary:
                for target, word_id in dictionary[key]:
                    lattice[i][j].append((target, word_id))
            elif len(key) == 1:
                # Create _UNK node with verbatim target when single character key is not found in the dictionary.
                lattice[i][j].append((key, unk_id))

    _, eos_id = dictionary['_EOS'][0]
    lattice[-1][-1].append(('', eos_id))
    return lattice


def initialize_queues(lattice, rnn_predictor, dictionary):
    # Initialize priority queues for keeping hypotheses
    # A hypothesis is a tuple of (cost, string, state, prediction)
    # cost is total negative log probability
    # state.shape == [hidden_size * layer_size]
    # prediction.shape == [vocabulary_size]
    
    hiragana_added = []
    
    hidden_f = rnn_predictor.initialize_hidden_state()
    hidden_b = hidden_f
    
    _, bos_id = dictionary['_BOS'][0]
    
    input_x0 = tf.expand_dims([bos_id], 0)
    
    # bos_predictions, hidden_f, hidden_b = rnn_predictor(input_x0, hidden_f, hidden_b, False)
    # bos_predictions, hidden_f, hidden_b = rnn_predictor(input_x0, hidden_f, False)
    bos_predictions, hidden_f = rnn_predictor(input_x0, hidden_f, False)
    
    
    # logits to probability
    # bos_predictions = tf.math.softmax(bos_predictions, axis=1)
    # print("bos_predictions.shape 1: ", bos_predictions.shape)
    
    bos_predictions = tf.squeeze(bos_predictions, 0)
    bos_predictions = tf.squeeze(bos_predictions, 0)
    bos_predictions = -1 * tf.nn.log_softmax(bos_predictions, axis=0)
    # print("bos_predictions.shape 2: ", bos_predictions.shape)
    
    hidden_f = tf.expand_dims(hidden_f, 0)
#     hidden_b = tf.expand_dims(hidden_b, 0)
    
    # bos_hypothesis = (0.0, '', hidden_f[0], hidden_b[0], bos_predictions[0])
    bos_hypothesis = (0.0, '', hidden_f[0], bos_predictions)
    queues = [[] for _ in range(len(lattice))]
    queues[0].append(bos_hypothesis)
    return queues

def search(lattice, queues, rnn_predictor, beam_size, viterbi_size):
    # Breadth first search with beam pruning and viterbi-like pruning
    for i in range(len(lattice)):
        queue = []

        # create hypotheses without predicting next word
        for j in range(len(lattice[i])):
            for target, word_id in lattice[i][j]:
                
                # if word_id == 350:
                #    print("word_id: ", word_id)
                
                word_queue = []
                for previous_cost, previous_string, previous_state_f, previous_prediction in queues[j]:
                    # if logits is bigger, better.
                    
                    # seems to need give huge priority to first word
                    cost = previous_cost + previous_prediction[word_id]
#                     if previous_string == "":
#                         print("previous_cost: ", previous_cost)
#                         print("word_id: ", word_id, previous_prediction[word_id])
#                         print("previous_prediction.shape: ", previous_prediction)
                        
#                         cost = previous_cost + previous_prediction[word_id] * 10 * i
#                         # cost = previous_cost + previous_prediction[word_id]
#                         # if word_id == 2774:
#                         #     print("word_id: ", word_id, " cost: ", cost, previous_prediction[word_id])
#                     else:
#                         cost = previous_cost + previous_prediction[word_id]
#                         # if ishira(target) == True:
#                         #    cost = previous_cost + previous_prediction[word_id]
#                         #else:
#                         #    cost = previous_cost + previous_prediction[word_id] * len(target)
                        
                    # print("aa: ", previous_string, target, previous_cost, previous_prediction[word_id].numpy())
                    string = previous_string + target
                    hypothesis = (cost, string, word_id, previous_state_f)
                    word_queue.append(hypothesis)

                # if word_id == 350:
                #     print("word_queue 1: ", word_queue[0].numpy(), word_queue[1] )
                    
                # prune word_queue to viterbi size
                if viterbi_size > 0:
                    word_queue = heapq.nsmallest(viterbi_size, word_queue, key=operator.itemgetter(0))
                    
                queue += word_queue

                
        # check word id 350
#        queue_350 = None
#         for item in queue:
#             if item[1] == "近代":
#                 queue_350 = item
#                 print("found it! 1")
                
        # prune queue to beam size
        if beam_size > 0:
            
#             # list all hiragana only strings
#             hiragana_word_queue = []
#             for item in queue:
#                 if ishira(item[1]) == True:
#                     hiragana_word_queue.append(item)
            
            queue = heapq.nsmallest(beam_size, queue, key=operator.itemgetter(0))
            
#             # add hiragana only strings after pruning
#             mojiretu_list = []
#             for item in queue:
#                 mojiretu_list.append(item[1])

#             for item in hiragana_word_queue:
#                 if item[1] not in mojiretu_list:
#                     if item[1] not in hiragana_added:
#                         hiragana_added.append(item[1])
#                         queue.append(item)
                    
            # # add 350 forcibley...
            #if queue_350 is not None:
            #    queue.append(queue_350)
            

#         # check word id 350
#         for item in queue:
#             if item[1] == "近代":
#                 print("found it! 2")
            
        # predict next word and state before continue
        for cost, string, word_id, previous_state_f in queue:
            
            input_x0 = tf.expand_dims([word_id], 0)
  
            # predictions, state_f, state_b = rnn_predictor(input_x0, [previous_state_f], [previous_state_b], False)
            # predictions, state_f, state_b = rnn_predictor(input_x0, [previous_state_f], False)
            predictions, state_f = rnn_predictor(input_x0, [previous_state_f], False)
            # predictions = tf.squeeze(predictions, 0)
        
            # logits to probability
            # predictions = tf.math.softmax(predictions, axis=1)
            
            
            predictions = tf.squeeze(predictions, 0)
            predictions = tf.squeeze(predictions, 0)
            predictions = -1 * tf.nn.log_softmax(predictions, axis=0)
            # print("predictions.shape: ", predictions.shape)
        
            state_f = tf.expand_dims(state_f, 0)
            # state_b = tf.expand_dims(state_b, 0)
            
#             hypothesis = (cost, string, state_f[0], predictions[0])
            hypothesis = (cost, string, state_f[0], predictions)
            queues[i].append(hypothesis)

    return queues

def decode(source, dictionary, rnn_predictor, beam_size, viterbi_size):
    lattice = create_lattice(source, dictionary)
    queues = initialize_queues(lattice, rnn_predictor, dictionary)
    queues = search(lattice, queues, rnn_predictor, beam_size, viterbi_size)

    candidates = []
    for cost, string, _, _ in queues[-1]:
        candidates.append((string, cost))

    top_result = candidates[0][0]
    return top_result, candidates, lattice, queues


In [50]:
def aaaaa(line, rnn_predictor):
    
    # Load settings and vocabulary
    model_directory = "models"
    dictionary = load_dictionary(model_directory)
    
    # debug
    print_nbest = True 
    print_lattice = True
    print_queue = True
    
    # parameters
    beam_size = 5
    viterbi_size = 50000
    
    
    # Iterate input file line by line
    line = line.rstrip('\n')

    # Decode - this might take ~10 seconds per line
    result, candidates, lattice, queues = decode(line, dictionary, rnn_predictor, beam_size, viterbi_size)

    # Print decoded results
    if not print_nbest:
        print(result)
    else:
        for string, cost in candidates:
            print(string, cost)

    # Print lattice for debug
    if print_lattice:
        for i in range(len(lattice)):
            for j in range(len(lattice[i])):
                print('i = {}, j = {}'.format(i, j))
                for target, word_id in lattice[i][j]:
                    print(target, word_id)

    # Print queues for debug
    if print_queue:
        for i, queue in enumerate(queues):
            print('queue', i)
            for cost, string, state_f, prediction in queue:
                # print(string, cost)
                print(string, cost)

In [51]:
aaaaa("ぱりのれきし", model)

パリの歴史 tf.Tensor(24.277733, shape=(), dtype=float32)
パリの歴誌 tf.Tensor(37.341175, shape=(), dtype=float32)
パリの歴し tf.Tensor(38.27497, shape=(), dtype=float32)
パリの歴史 tf.Tensor(38.383877, shape=(), dtype=float32)
パリノ歴史 tf.Tensor(38.963135, shape=(), dtype=float32)
i = 0, j = 0
i = 0, j = 1
i = 0, j = 2
i = 0, j = 3
i = 0, j = 4
i = 0, j = 5
i = 0, j = 6
i = 1, j = 0
パ 17374
i = 1, j = 1
i = 1, j = 2
i = 1, j = 3
i = 1, j = 4
i = 1, j = 5
i = 1, j = 6
i = 2, j = 0
パリ 1097
i = 2, j = 1
理 6135
り 9986
利 12479
李 15579
リ 30383
i = 2, j = 2
i = 2, j = 3
i = 2, j = 4
i = 2, j = 5
i = 2, j = 6
i = 3, j = 0
i = 3, j = 1
りの 33680
i = 3, j = 2
の 2
之 7760
ノ 14756
野 15437
乗 42304
i = 3, j = 3
i = 3, j = 4
i = 3, j = 5
i = 3, j = 6
i = 4, j = 0
i = 4, j = 1
i = 4, j = 2
乗れ 15458
i = 4, j = 3
れ 14
レ 14130
i = 4, j = 4
i = 4, j = 5
i = 4, j = 6
i = 5, j = 0
i = 5, j = 1
i = 5, j = 2
i = 5, j = 3
暦 7174
歴 14874
i = 5, j = 4
き 84
機 247
期 342
来 931
器 1811
気 1890
木 2089
記 3937
着 5415
黄 7605
樹 9280
奇 14415
伐 2539