In [1]:
import pyprind
import pandas as pd
from string import punctuation
import re
import numpy as np

df = pd.read_csv('../../../movie_data.csv', encoding='utf-8')

In [2]:
from collections import Counter

counts = Counter()
pbar = pyprind.ProgBar(len(df['review']), title='Counting words occurrences')
for i, review in enumerate(df['review']):
    # あとで半角スペースで単語を区切れるように、
    # 句読点などの周りに半角スペースを挿入。
    text = ''.join([c if c not in punctuation else ' '+c+' '
                               for c in review]).lower()
    df.loc[i, 'review'] = text
    pbar.update()
    counts.update(text.split())

Counting words occurrences
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:16:21


In [3]:
a = "This movie is great!"
''.join([c if c not in punctuation else ' '+c+' ' for c in a]).lower()

#for c in a:
#    print(c)

'this movie is great ! '

In [4]:
word_counts = sorted(counts, key=counts.get, reverse=True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}

mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']), title='Map reviews to ints')
for review in df['review']:
    mapped_reviews.append([word_to_int[word] for word in review.split()])
    pbar.update()

Map reviews to ints


['the', '.', ',', 'and', 'a']


0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:04


In [5]:
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype=int)

for i, row in enumerate(mapped_reviews):
    review_arr = np.array(row)
    sequences[i, -len(row):] = review_arr[-sequence_length:]

In [6]:
a = np.zeros((5,), dtype=int)
print(a)
b = np.array([1, 2, 3, 4, 5])
a[-2:] = b[-2:]
print(a)

[0 0 0 0 0]
[0 0 0 4 5]


In [7]:
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values

X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values

In [8]:
np.random.seed(123)

def create_batch_generator(x, y=None, batch_size=64):
    n_batches = len(x) // batch_size  # 切り捨て除算
    x = x[:n_batches * batch_size]  # バッチサイズで割り切れない「はみ出し」を除外
    if y is not None:
        y = y[:n_batches * batch_size]
    for ii in range(0, len(x), batch_size):
        if y is not None:
            yield x[ii : ii + batch_size], y[ii : ii + batch_size]
        else:
            yield x[ii : ii + batch_size]

In [28]:
import tensorflow as tf

class SentimentRNN(object):
    def __init__(self, n_words, seq_len=200,
                            lstm_size=256, num_layers=1, batch_size=64,
                            learning_rate=0.0001, embed_size=200):
        self.n_words  = n_words
        self.seq_len = seq_len
        self.lstm_size = lstm_size
        self.num_layers = num_layers # LSTMのセル数
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.embed_size = embed_size
        
        self.g = tf.Graph()
        with self.g.as_default():
            tf.set_random_seed(123)
            self.build()
            self.saver = tf.train.Saver()
            self.init_op = tf.global_variables_initializer()
            
    def build(self):
        tf_x = tf.placeholder(tf.int32,
                                             shape=(self.batch_size, self.seq_len),
                                             name='tf_x')
        tf_y = tf.placeholder(tf.float32,
                                             shape=(self.batch_size),
                                             name='tf_y')
        tf_keepprob = tf.placeholder(tf.float32,
                                                            name='tf_keepprob')
        
        embedding = tf.Variable(tf.random_uniform((self.n_words, self.embed_size),
                                                                                     minval=-1, maxval=1),
                                                   name='embedding')
        
        embed_x = tf.nn.embedding_lookup(embedding, tf_x, 
                                                                        name='embeded_x')
        
        print('  << embed_x >>  ', embed_x)
        
        # ここでいうセルとは、RNNの層のことっぽい
        cells = tf.contrib.rnn.MultiRNNCell(
            [tf.contrib.rnn.DropoutWrapper(
                tf.contrib.rnn.BasicLSTMCell(self.lstm_size),
                output_keep_prob=tf_keepprob)
             
            for i in range(self.num_layers)])
        
        self.initial_state = cells.zero_state(self.batch_size, tf.float32)
        print('  << initial state >>  ', self.initial_state)
        
        # 埋め込みデータ、RNNセル、セルの初期状態をもとに、LSTMのパイプラインを作成する。
        lstm_outputs, self.final_state = \
            tf.nn.dynamic_rnn(cells, embed_x, initial_state=self.initial_state)
        
        # lstm_outputsは、各タイムステップにおける、隠れ層ユニットの出力。ユニット数だけ存在する。
        # 形状は(batch_size, タイムステップ数, 隠れ層のユニット数（lstm_size）)
        # 
        # 参考：　https://jp.mathworks.com/help/deeplearning/ug/long-short-term-memory-networks.html;jsessionid=72c069a296d252b44e0d114132f6
        # 参考：　https://orizuru.io/blog/machine-learning/lstm/
        print('\n  << lstm_output   >>  ', lstm_outputs)
        
        # final_stateは、最後のタイムステップにおける、「隠れ層の出力」と「セル状態」がセットになったもの。
        # 参考：　https://stackoverflow.com/questions/49969349/hidden-states-vs-final-state-returned-by-tensorflows-dynamic-rnn
        print('\n  << final state   >>  ', self.final_state)
        
        # final_stateを次のミニバッチの初期状態にする理由は、
        # ミニバッチ間の同一インデックスのデータどうしが系列関係にあって、
        # その関係も考慮に入れて学習させるため。
        # だが、今回の場合はそういった関係性はなさそうなので、本来は不要そう。
        
        logits = tf.layers.dense(inputs=lstm_outputs[:, -1],
                                                units=1, activation=None,
                                                name='logits')
        
        print('\n  << logits  >>  ', logits)
        
        # サイズが１の次元を削除する。２次元→１次元（ベクトル）になる。
        logits = tf.squeeze(logits, name='logits_squeezed')
        print('\n  << logits squeezed  >>  ', logits)
        
        y_proba = tf.nn.sigmoid(logits, name='probabilities')
        
        predictions = {
            'probabilities': y_proba,
            'labels': tf.cast(tf.round(y_proba), tf.int32, name='labels')
        }
        print('\n  << predictions   >>  ', predictions)
        
        cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=tf_y, logits=logits),
            name='cost')
        
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        train_op = optimizer.minimize(cost, name='train_op')
    
    def train(self, X_train, y_train, num_epochs):
        with tf.Session(graph=self.g) as sess:
            sess.run(self.init_op)
            iteration = 1
            for epoch in range(num_epochs):
                state = sess.run(self.initial_state)
                
                for batch_x, batch_y in create_batch_generator(
                        X_train, y_train, self.batch_size):
                    
                    feed = {'tf_x:0': batch_x,
                                   'tf_y:0': batch_y,
                                   'tf_keepprob:0': 0.5,
                                   self.initial_state: state}
                    
                    loss, _, state = sess.run(
                        ['cost:0', 'train_op', self.final_state],
                        feed_dict=feed)
                    
                    if iteration % 20 == 0:
                        print('Epochs: %d/%d Iteration: %d | Train loss: %.5f'
                                     % (epoch + 1, num_epochs, iteration, loss))
                        
                    iteration += 1
                
                if(epoch+1) % 10 == 0:
                    self.saver.save(sess, "ch16-model/sentiment-%d.ckpt" % epoch)
        
    # 予測のときにもミニバッチ間で最終状態の受け渡しをしている。
    # あくまでも学習させたいのは重みであって、状態を学習させたいわけではない。
    # ミニバッチ間の同一インデックスのデータどうしに系列関係がある場合は、
    # 学習した重みを活用しつつ、状態を共有しないと正しい出力が出ない、ということ。
    def predict(self, X_data, return_proba=False):
        preds = []
        with tf.Session(graph = self.g) as sess:
            self.saver.restore(sess, tf.train.latest_checkpoint('./ch16-model/'))
            test_state = sess.run(self.initial_state)
        
            for ii, batch_x in enumerate(create_batch_generator(
                                                                X_data, 
                                                                None, 
                                                                batch_size=self.batch_size),
                                                             1):

                feed = {'tf_x:0': batch_x, 'tf_keepprob:0': 1.0,
                               self.initial_state: test_state}

                if return_proba:
                    pred, test_state = sess.run(
                        ['probabilities:0', self.final_state],
                        feed_dict=feed)
                else:
                    pred, test_state = sess.run(
                        ['labels:0', self.final_state],
                        feed_dict=feed)

                preds.append(pred)
            
        return np.concatenate(preds)

In [29]:
# 長さが200未満のシーケンスを0でパディングするために、+1している。
# Undefinedみたいな語彙を１つ足してる感じだと思う。
n_words = max(list(word_to_int.values())) + 1

rnn = SentimentRNN(n_words=n_words,
                                      seq_len=sequence_length,
                                      embed_size=256,
                                      lstm_size=128,
                                      num_layers=1,
                                      batch_size=100,
                                      learning_rate=0.001)



  << embed_x >>   Tensor("embeded_x/Identity:0", shape=(100, 200, 256), dtype=float32)
  << initial state >>   (LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0' shape=(100, 128) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 128) dtype=float32>),)

  << lstm_output   >>   Tensor("rnn/transpose_1:0", shape=(100, 200, 128), dtype=float32)

  << final state   >>   (LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_3:0' shape=(100, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_4:0' shape=(100, 128) dtype=float32>),)

  << logits  >>   Tensor("logits/BiasAdd:0", shape=(100, 1), dtype=float32)

  << logits squeezed  >>   Tensor("logits_squeezed:0", shape=(100,), dtype=float32)

  << predictions   >>   {'probabilities': <tf.Tensor 'probabilities:0' shape=(100,) dtype=float32>, 'labels': <tf.Tensor 'labels:0' shape=(100,) dtype=int32>}


In [11]:
n_words

102967

In [13]:
rnn.train(X_train, y_train, num_epochs=40)

Epochs: 1/40 Iteration: 20 | Train loss: 0.68144
Epochs: 1/40 Iteration: 40 | Train loss: 0.62962
Epochs: 1/40 Iteration: 60 | Train loss: 0.64887
Epochs: 1/40 Iteration: 80 | Train loss: 0.62561
Epochs: 1/40 Iteration: 100 | Train loss: 0.57090
Epochs: 1/40 Iteration: 120 | Train loss: 0.53749
Epochs: 1/40 Iteration: 140 | Train loss: 0.48158
Epochs: 1/40 Iteration: 160 | Train loss: 0.45924
Epochs: 1/40 Iteration: 180 | Train loss: 0.47754
Epochs: 1/40 Iteration: 200 | Train loss: 0.44295
Epochs: 1/40 Iteration: 220 | Train loss: 0.39990
Epochs: 1/40 Iteration: 240 | Train loss: 0.41825
Epochs: 2/40 Iteration: 260 | Train loss: 0.39764
Epochs: 2/40 Iteration: 280 | Train loss: 0.41892
Epochs: 2/40 Iteration: 300 | Train loss: 0.51972
Epochs: 2/40 Iteration: 320 | Train loss: 0.36637
Epochs: 2/40 Iteration: 340 | Train loss: 0.32235
Epochs: 2/40 Iteration: 360 | Train loss: 0.22373
Epochs: 2/40 Iteration: 380 | Train loss: 0.35304
Epochs: 2/40 Iteration: 400 | Train loss: 0.33850
Epoc

Epochs: 13/40 Iteration: 3240 | Train loss: 0.03627
Epochs: 14/40 Iteration: 3260 | Train loss: 0.00709
Epochs: 14/40 Iteration: 3280 | Train loss: 0.00530
Epochs: 14/40 Iteration: 3300 | Train loss: 0.04619
Epochs: 14/40 Iteration: 3320 | Train loss: 0.00383
Epochs: 14/40 Iteration: 3340 | Train loss: 0.00228
Epochs: 14/40 Iteration: 3360 | Train loss: 0.00376
Epochs: 14/40 Iteration: 3380 | Train loss: 0.00852
Epochs: 14/40 Iteration: 3400 | Train loss: 0.00202
Epochs: 14/40 Iteration: 3420 | Train loss: 0.03039
Epochs: 14/40 Iteration: 3440 | Train loss: 0.00295
Epochs: 14/40 Iteration: 3460 | Train loss: 0.00215
Epochs: 14/40 Iteration: 3480 | Train loss: 0.00254
Epochs: 14/40 Iteration: 3500 | Train loss: 0.00554
Epochs: 15/40 Iteration: 3520 | Train loss: 0.01981
Epochs: 15/40 Iteration: 3540 | Train loss: 0.00087
Epochs: 15/40 Iteration: 3560 | Train loss: 0.00387
Epochs: 15/40 Iteration: 3580 | Train loss: 0.00126
Epochs: 15/40 Iteration: 3600 | Train loss: 0.00632
Epochs: 15/4

KeyboardInterrupt: 

In [26]:
preds = rnn.predict(X_test)
y_true = y_test[:len(preds)]

print('Test Acc.: %.3f' % (np.sum(preds == y_true) / len(y_true)))

INFO:tensorflow:Restoring parameters from ./ch16-model/sentiment-19.ckpt
Test Acc.: 0.847


In [30]:
proba = rnn.predict(X_test, return_proba=True)
print(proba)

INFO:tensorflow:Restoring parameters from ./ch16-model/sentiment-19.ckpt
[5.3644180e-07 9.9999857e-01 9.6604210e-01 ... 1.5084714e-06 4.7006497e-06
 9.9465466e-01]


In [4]:
import numpy as np

with open('../../../ch16_work/pg2265.txt', 'r', encoding='utf-8') as f:
    text = f.read()

text = text[15858:]
chars = set(text)

char2int = {ch:i for i, ch in enumerate(chars)}
int2char = dict(enumerate(chars))
text_ints = np.array([char2int[ch] for ch in text], dtype=np.int32)

In [5]:
print(len(text_ints))

163239


In [33]:
'''
長文を一気に流し込むのではなく
モデルの構造に合わせて適切に分割をして、トレーニングデータを作成する。

なぜ？
　　ミニバッチ単位で処理すると、行列で一気に演算できるので、学習時の演算効率が良い？
    　　→　であればバッチサイズ単位で小分けすべき。
      　　　　　ただ、それらの行データ間の系列は考慮できなくなるが、今回の場合は大丈夫なんだろう。
　　あまりに巨大なステップ数だと、誤差が伝播しない？
  　　→　であればステップ数をある程度の大きさに抑えるべき。

LSTMのステップ数、LSTMに投入するバッチサイズを考慮して分割する。

※用語
バッチサイズ：　１バッチあたりのデータ件数（行数）
バッチの個数：バッチサイズ/1個のバッチが何個あるか

たとえば、

【変換前の文字シーケンス】
あいうえおかきくけこ
さしすせそたちつてと
なにぬねのはひふへほ
まみむめもらりるれろい
（文字数：41）

【ステップ数】：5
【バッチサイズ】：2

とすると、

【x】
あいうえお　　　　　かきくけこ　　　　　さしすせそ　　　　　たちつてと
なにぬねの　　　はひふへほ　　　まみむめも　　　　　らりるれろ

【y】
いうえおか　　　　　きくけこさ　　　　　しすせそた　　　　　ちつてとな
にぬねのは　　　ひふへほま　　　みむめもら　　　　　りるれろい


という感じに分割する。

この場合のバッチの個数は4。
'''
def reshape_data(sequence, batch_size, num_steps):
    # 1バッチで扱う文字数
    mini_batch_length = batch_size * num_steps
    
    num_batches = int(len(sequence) / mini_batch_length)
    
    # 全文字数が1バッチで扱う文字数で、ちょうど割り切れる場合、
    # xの最後の文字に対するyが無くなってしまう。そこで、バッチの数を一つ減らす。
    if num_batches*mini_batch_length + 1 > len(sequence):
        num_batches = num_batches - 1
    
    x = sequence[0: num_batches * mini_batch_length]
    y = sequence[1: num_batches * mini_batch_length + 1]
    
    x_batch_splits = np.split(x, batch_size)
    y_batch_splits = np.split(y, batch_size)
    
    x = np.stack(x_batch_splits)
    y = np.stack(y_batch_splits)
    
    return x, y

def create_batch_generator(data_x, data_y, num_steps):
    batch_size, tot_batch_length = data_x.shape
    num_batches  = int(tot_batch_length / num_steps)
    for b in range(num_batches):
        yield (data_x[:, b*num_steps : (b+1)*num_steps],
                  data_y[:, b*num_steps : (b+1)*num_steps])

In [34]:
a = 'あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもらりるれろい'
a = np.array([char for char in a])

x, y = reshape_data(a, batch_size=2, num_steps=5)

print('x.shape: ', x.shape)
print('y.shape: ', y.shape)
print('x: \n', x)
print('y: \n', y)

x.shape:  (2, 20)
y.shape:  (2, 20)
x: 
 [['あ' 'い' 'う' 'え' 'お' 'か' 'き' 'く' 'け' 'こ' 'さ' 'し' 'す' 'せ' 'そ' 'た' 'ち' 'つ' 'て' 'と']
 ['な' 'に' 'ぬ' 'ね' 'の' 'は' 'ひ' 'ふ' 'へ' 'ほ' 'ま' 'み' 'む' 'め' 'も' 'ら' 'り' 'る' 'れ' 'ろ']]
y: 
 [['い' 'う' 'え' 'お' 'か' 'き' 'く' 'け' 'こ' 'さ' 'し' 'す' 'せ' 'そ' 'た' 'ち' 'つ' 'て' 'と' 'な']
 ['に' 'ぬ' 'ね' 'の' 'は' 'ひ' 'ふ' 'へ' 'ほ' 'ま' 'み' 'む' 'め' 'も' 'ら' 'り' 'る' 'れ' 'ろ' 'い']]


In [37]:
'''
こうなるはず。

【x】
あいうえお　　　　　かきくけこ　　　　　さしすせそ　　　　　たちつてと
なにぬねの　　　はひふへほ　　　まみむめも　　　　　らりるれろ

【y】
いうえおか　　　　　きくけこさ　　　　　しすせそた　　　　　ちつてとな
にぬねのは　　　ひふへほま　　　みむめもら　　　　　りるれろい
'''
for xxx, yyy in create_batch_generator(x, y, num_steps=5):
    print('x: ', xxx[0])
    print('    ', xxx[1])
    print('y: ', yyy[0])
    print('    ', yyy[1])
    print()

x:  ['あ' 'い' 'う' 'え' 'お']
     ['な' 'に' 'ぬ' 'ね' 'の']
y:  ['い' 'う' 'え' 'お' 'か']
     ['に' 'ぬ' 'ね' 'の' 'は']

x:  ['か' 'き' 'く' 'け' 'こ']
     ['は' 'ひ' 'ふ' 'へ' 'ほ']
y:  ['き' 'く' 'け' 'こ' 'さ']
     ['ひ' 'ふ' 'へ' 'ほ' 'ま']

x:  ['さ' 'し' 'す' 'せ' 'そ']
     ['ま' 'み' 'む' 'め' 'も']
y:  ['し' 'す' 'せ' 'そ' 'た']
     ['み' 'む' 'め' 'も' 'ら']

x:  ['た' 'ち' 'つ' 'て' 'と']
     ['ら' 'り' 'る' 'れ' 'ろ']
y:  ['ち' 'つ' 'て' 'と' 'な']
     ['り' 'る' 'れ' 'ろ' 'い']



In [121]:
import tensorflow as tf
import os

class CharRNN(object):
    def __init__(self, num_classes, batch_size=64, num_steps=100,
                            lstm_size=128, num_layers=1, learning_rate=0.001,
                            keep_prob=0.5, grad_clip=5, sampling=False):
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.num_steps = num_steps
        self.lstm_size = lstm_size
        self.num_layers = num_layers
        self.learning_rate = learning_rate
        self.keep_prob = keep_prob
        self.grad_clip = grad_clip
        
        self.g = tf.Graph()
        with self.g.as_default():
            tf.set_random_seed(123)
            
            self.build(sampling=sampling)
            self.saver = tf.train.Saver()
            self.init_op = tf.global_variables_initializer()
            
    def build(self, sampling):
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size = self.batch_size
            num_steps = self.num_steps
            
        tf_x = tf.placeholder(tf.int32, 
                                             shape=[batch_size, num_steps],
                                             name='tf_x')
        
        tf_y = tf.placeholder(tf.int32,
                                             shape=[batch_size, num_steps],
                                             name='tf_y')

        tf_keepprob = tf.placeholder(tf.float32,
                                                            name='tf_keepprob')
        
        x_onehot = tf.one_hot(tf_x, depth=self.num_classes)
        y_onehot = tf.one_hot(tf_y, depth=self.num_classes)

        print('  <<  x_onehot  >> ', x_onehot)
        print('  <<  y_onehot  >> ', y_onehot)

        cells = tf.contrib.rnn.MultiRNNCell(
                                                [tf.contrib.rnn.DropoutWrapper(
                                                    tf.contrib.rnn.BasicLSTMCell(self.lstm_size),
                                                    output_keep_prob=tf_keepprob) for _ in range(self.num_layers)])
        
        self.initial_state = cells.zero_state(batch_size, tf.float32)
        
        lstm_outputs, self.final_state = \
            tf.nn.dynamic_rnn(cells, x_onehot, initial_state=self.initial_state)
        
        print('  <<  lstm_outputs  >> ', lstm_outputs)
        
        '''
        バッチを跨いだ全ステップの出力（lstm_size）を一覧化する感じ
        
        バッチ１件目の1ステップの出力
        バッチ１件目の1ステップの出力
        ・・・
        バッチ１件目のnステップの出力
        バッチ2件目の1ステップの出力
        バッチ2件目の1ステップの出力
        ・・・
        バッチ2件目のnステップの出力
        ・・・
        バッチm件目のnステップの出力
        '''
        seq_output_reshaped = tf.reshape(lstm_outputs,
                                                                        shape=[-1, self.lstm_size],
                                                                        name='seq_output_reshaped')
        
        # 各ステップの出力（lstm_size）から、文字種数のユニットに全結合。
        # 文字ごとの確度を算出（どの文字に該当するか）。
        logits = tf.layers.dense(inputs=seq_output_reshaped,
                                                units=self.num_classes,
                                                activation=None,
                                                name='logits')
        
        # softmaxで確率の形式に変換（総和が1になるように変換）
        proba = tf.nn.softmax(logits, name='probabilities')
        
        y_reshaped = tf.reshape(y_onehot,
                                                   shape=[-1, self.num_classes])
        
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                                                      labels=y_reshaped),
            name='cost')
        
        # 勾配刈り込み（まったく分からん）
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 
                                                                     self.grad_clip)
        
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars), name='train_op')

    # train_xは、reshape_data()で分割した段階のもの。
    def train(self, train_x, train_y, num_epochs, ckpt_dir='./ch16-model2/'):
        if not os.path.exists(ckpt_dir):
            os.mkdir(ckpt_dir)
        
        with tf.Session(graph=self.g) as sess:
            sess.run(self.init_op)
            
            n_batches = int(train_x.shape[1] / self.num_steps)
            iterations = n_batches * num_epochs
            for epoch in range(num_epochs):
                
                new_state = sess.run(self.initial_state)
                loss = 0
                
                bgen = create_batch_generator(train_x, train_y, self.num_steps)
                for b, (batch_x, batch_y) in enumerate(bgen, 1):
                    iteration = epoch * n_batches + b
                    
                    feed = {'tf_x:0': batch_x, 'tf_y:0': batch_y,
                                   'tf_keepprob:0': self.keep_prob,
                                   self.initial_state : new_state}
                    
                    batch_cost, _, new_state = sess.run(
                        ['cost:0', 'train_op', self.final_state],
                        feed_dict=feed)
                    
                    if iteration % 10 == 0:
                        print('Epoch %d/%d Iteration %d| Training loss: %.4f' % 
                                 (epoch + 1, num_epochs, iteration, batch_cost))
                        
                self.saver.save(sess, os.path.join(ckpt_dir, 'language_modeling.ckpt'))
            
    def sample(self, output_length, ckpt_dir, starter_seq="The "):
        observed_seq = [ch for ch in starter_seq]
        
        with tf.Session(graph=self.g) as sess:
            self.saver.restore(sess, tf.train.latest_checkpoint(ckpt_dir))
            
            # 1文字単位でモデルを実行し、
            # 得られた最終状態を次の文字で実行する時に利用する。
            # モデルのサンプリングモードは、このような形態のために用意されている。
            new_state = sess.run(self.initial_state)
            for ch in starter_seq:
                x = np.zeros((1, 1))
                x[0, 0] = char2int[ch]
                feed = {'tf_x:0': x, 'tf_keepprob:0': 1.0, 
                               self.initial_state: new_state}
                
                proba, new_state = sess.run(
                    ['probabilities:0', self.final_state],
                    feed_dict=feed)
                
            ch_id = get_top_char(proba, len(chars))
            observed_seq.append(int2char[ch_id])
            
            for i in range(output_length):
                x[0, 0] = ch_id
                feed = {'tf_x:0': x, 'tf_keepprob:0': 1.0,
                               self.initial_state: new_state}
                proba, new_state = sess.run(
                    ['probabilities:0', self.final_state],
                    feed_dict=feed)
                
                ch_id = get_top_char(proba, len(chars))
                observed_seq.append(int2char[ch_id])
                
        return ''.join(observed_seq)

In [38]:
len(chars)

68

In [120]:
def get_top_char(probas, char_size, top_n=5):
    p = np.squeeze(probas)
    
    # 順位がtop_n未満の要素について、値（確率）をゼロにする
    p[np.argsort(p)[:-top_n]] = 0.0
    
    # 順位がtop_nの要素間で、値（確率）を標準化（合計が1になるようにしてる？）
    p = p / np.sum(p)
    
    # 0〜char_sizeの整数値から、pが表す確率に従って、ランダムに値を選択。
    ch_id = np.random.choice(char_size, 1, p=p)[0]
    
    return ch_id

In [94]:
probas = np.array([0.01, 0.01, 0.01, 0.9, 0.01, 0.01, 0.05, 0.0, 0.0, 0.0])
get_top_char(probas, 10)

array([3])

In [110]:
batch_size = 64
num_steps = 100

train_x, train_y = reshape_data(text_ints, batch_size, num_steps)

rnn = CharRNN(num_classes=len(chars), batch_size=batch_size)
rnn.train(train_x, train_y, num_epochs=100, ckpt_dir='./ch16-model2/')

  <<  x_onehot  >>  Tensor("one_hot:0", shape=(64, 100, 68), dtype=float32)
  <<  y_onehot  >>  Tensor("one_hot_1:0", shape=(64, 100, 68), dtype=float32)
  <<  lstm_outputs  >>  Tensor("rnn/transpose_1:0", shape=(64, 100, 128), dtype=float32)
Epoch 1/100 Iteration 10| Training loss: 3.7141
Epoch 1/100 Iteration 20| Training loss: 3.3834
Epoch 2/100 Iteration 30| Training loss: 3.3147
Epoch 2/100 Iteration 40| Training loss: 3.2510
Epoch 2/100 Iteration 50| Training loss: 3.2558
Epoch 3/100 Iteration 60| Training loss: 3.2004
Epoch 3/100 Iteration 70| Training loss: 3.2061
Epoch 4/100 Iteration 80| Training loss: 3.1945
Epoch 4/100 Iteration 90| Training loss: 3.1483
Epoch 4/100 Iteration 100| Training loss: 3.1705
Epoch 5/100 Iteration 110| Training loss: 3.1185
Epoch 5/100 Iteration 120| Training loss: 3.1142
Epoch 6/100 Iteration 130| Training loss: 3.0933
Epoch 6/100 Iteration 140| Training loss: 3.0267
Epoch 6/100 Iteration 150| Training loss: 3.0222
Epoch 7/100 Iteration 160| Trai

Epoch 64/100 Iteration 1600| Training loss: 1.9915
Epoch 65/100 Iteration 1610| Training loss: 1.9628
Epoch 65/100 Iteration 1620| Training loss: 2.0120
Epoch 66/100 Iteration 1630| Training loss: 1.9771
Epoch 66/100 Iteration 1640| Training loss: 1.9799
Epoch 66/100 Iteration 1650| Training loss: 1.9766
Epoch 67/100 Iteration 1660| Training loss: 1.9459
Epoch 67/100 Iteration 1670| Training loss: 1.9993
Epoch 68/100 Iteration 1680| Training loss: 1.9738
Epoch 68/100 Iteration 1690| Training loss: 1.9814
Epoch 68/100 Iteration 1700| Training loss: 1.9643
Epoch 69/100 Iteration 1710| Training loss: 1.9514
Epoch 69/100 Iteration 1720| Training loss: 1.9846
Epoch 70/100 Iteration 1730| Training loss: 1.9603
Epoch 70/100 Iteration 1740| Training loss: 1.9618
Epoch 70/100 Iteration 1750| Training loss: 1.9742
Epoch 71/100 Iteration 1760| Training loss: 1.9358
Epoch 71/100 Iteration 1770| Training loss: 1.9851
Epoch 72/100 Iteration 1780| Training loss: 1.9589
Epoch 72/100 Iteration 1790| Tr

In [122]:
del rnn

np.random.seed(123)
rnn = CharRNN(len(chars), sampling=True)
print(rnn.sample(ckpt_dir='./ch16-model2/', output_length=500))

  <<  x_onehot  >>  Tensor("one_hot:0", shape=(1, 1, 68), dtype=float32)
  <<  y_onehot  >>  Tensor("one_hot_1:0", shape=(1, 1, 68), dtype=float32)
  <<  lstm_outputs  >>  Tensor("rnn/transpose_1:0", shape=(1, 1, 128), dtype=float32)
INFO:tensorflow:Restoring parameters from ./ch16-model2/language_modeling.ckpt
The horee and merse to the Seere

   Ham. He tould a to the that he will to thy too merence

   Ham. Well sim heare the Somnes in the preauth, with thes as it me the to that,
That so this thy were is me me, and whing it sond, thay,
As my his mad a dond at homan sout some

   Ophe. Is stay and marnt and blyowne of the King,
And thas war the Connous, in in toust thougherter him.
What a whith stones thou mat sellost it my teene
 he door to be that
  Poos. I my Larde hither, thay shere his mad and
Hant t
