import dependencies

In [1]:
import tensorflow as tf
import numpy as np
import random
import time
import sys

RNN with num_layers LSTM layers and a fully-connected output layer
The network allows for a dynamic number of iterations, depending on the inputs it receives.

out (fc layer; out_size) <- lstm <- lstm <- in (in_size)

In [2]:
class ModelNetwork:
    def __init__(self, in_size, lstm_size, num_layers, out_size, session, learning_rate=0.003, name="rnn"):
        self.scope = name

        self.in_size = in_size
        self.lstm_size = lstm_size
        self.num_layers = num_layers
        self.out_size = out_size

        self.session = session

        self.learning_rate = tf.constant( learning_rate )

        # Last state of LSTM, used when running the network in TEST mode
        self.lstm_last_state = np.zeros((self.num_layers*2*self.lstm_size,))

        with tf.variable_scope(self.scope):
            ## (batch_size, timesteps, in_size)
            self.xinput = tf.placeholder(tf.float32, shape=(None, None, self.in_size), name="xinput")
            self.lstm_init_value = tf.placeholder(tf.float32, shape=(None, self.num_layers*2*self.lstm_size), name="lstm_init_value")

            # LSTM
            self.lstm_cells = [ tf.contrib.rnn.BasicLSTMCell(self.lstm_size, forget_bias=1.0, state_is_tuple=False) for i in range(self.num_layers)]
            self.lstm = tf.contrib.rnn.MultiRNNCell(self.lstm_cells, state_is_tuple=False)

            # Iteratively compute output of recurrent network
            outputs, self.lstm_new_state = tf.nn.dynamic_rnn(self.lstm, self.xinput, initial_state=self.lstm_init_value, dtype=tf.float32)

            # Linear activation (FC layer on top of the LSTM net)
            self.rnn_out_W = tf.Variable(tf.random_normal( (self.lstm_size, self.out_size), stddev=0.01 ))
            self.rnn_out_B = tf.Variable(tf.random_normal( (self.out_size, ), stddev=0.01 ))

            outputs_reshaped = tf.reshape( outputs, [-1, self.lstm_size] )
            network_output = ( tf.matmul( outputs_reshaped, self.rnn_out_W ) + self.rnn_out_B )

            batch_time_shape = tf.shape(outputs)
            self.final_outputs = tf.reshape( tf.nn.softmax( network_output), (batch_time_shape[0], batch_time_shape[1], self.out_size) )


            ## Training: provide target outputs for supervised training.
            self.y_batch = tf.placeholder(tf.float32, (None, None, self.out_size))
            y_batch_long = tf.reshape(self.y_batch, [-1, self.out_size])

            self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=network_output, labels=y_batch_long) )
            self.train_op = tf.train.RMSPropOptimizer(self.learning_rate, 0.9).minimize(self.cost)


    ## Input: X is a single element, not a list!
    def run_step(self, x, init_zero_state=True):
        ## Reset the initial state of the network.
        if init_zero_state:
            init_value = np.zeros((self.num_layers*2*self.lstm_size,))
        else:
            init_value = self.lstm_last_state

        out, next_lstm_state = self.session.run([self.final_outputs, self.lstm_new_state], feed_dict={self.xinput:[x], self.lstm_init_value:[init_value]   } )

        self.lstm_last_state = next_lstm_state[0]

        return out[0][0]


    ## xbatch must be (batch_size, timesteps, input_size)
    ## ybatch must be (batch_size, timesteps, output_size)
    def train_batch(self, xbatch, ybatch):
        init_value = np.zeros((xbatch.shape[0], self.num_layers*2*self.lstm_size))

        cost, _ = self.session.run([self.cost, self.train_op], feed_dict={self.xinput:xbatch, self.y_batch:ybatch, self.lstm_init_value:init_value   } )

        return cost

Embed string to character-arrays -- it generates an array len(data) x len(vocab)

Vocab is a list of elements

In [3]:
def embed_to_vocab(data_, vocab):
    data = np.zeros((len(data_), len(vocab)))

    cnt=0
    for s in data_:
        v = [0.0]*len(vocab)
        v[vocab.index(s)] = 1.0
        data[cnt, :] = v
        cnt += 1

    return data

def decode_embed(array, vocab):
    return vocab[ array.index(1) ]

In [4]:
ckpt_file = ""
TEST_PREFIX = "The " # Prefix to prompt the network in test mode

## Load the data
data_ = ""
with open('data/dostoevski.txt', 'r', encoding="utf-8") as f:
    data_ += f.read()
data_ = data_.lower()

## Convert to 1-hot coding
vocab = list(set(data_))

data = embed_to_vocab(data_, vocab)


in_size = out_size = len(vocab)
lstm_size = 512
num_layers = 2
batch_size = 64 #128
time_steps = 100 #50

NUM_TRAIN_BATCHES = 2500

LEN_TEST_TEXT = 500 # Number of test characters of text to generate after training the network



## Initialize the network
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.InteractiveSession(config=config)

net = ModelNetwork(in_size = in_size,lstm_size = lstm_size,num_layers = num_layers,out_size = out_size,session = sess,learning_rate = 0.003)

sess.run(tf.global_variables_initializer())

saver = tf.train.Saver(tf.global_variables())




train network

In [5]:
if ckpt_file == "":
    last_time = time.time()

    batch = np.zeros((batch_size, time_steps, in_size))
    batch_y = np.zeros((batch_size, time_steps, in_size))

    possible_batch_ids = range(data.shape[0]-time_steps-1)
    for i in range(NUM_TRAIN_BATCHES):
        # Sample time_steps consecutive samples from the dataset text file
        batch_id = random.sample( possible_batch_ids, batch_size )

        for j in range(time_steps):
            ind1 = [k+j for k in batch_id]
            ind2 = [k+j+1 for k in batch_id]

            batch[:, j, :] = data[ind1, :]
            batch_y[:, j, :] = data[ind2, :]


        cst = net.train_batch(batch, batch_y)

        if (i%10) == 0:
            new_time = time.time()
            diff = new_time - last_time
            last_time = new_time

            print("batch: ",i,"   loss: ",cst,"   speed: ",(100.0/diff)," batches / s")

saver.save(sess, "./model.ckpt")

batch:  0    loss:  4.55155    speed:  273.4786651244127  batches / s
batch:  10    loss:  4.54952    speed:  32.496096528569815  batches / s
batch:  20    loss:  4.54618    speed:  32.026432949191616  batches / s
batch:  30    loss:  4.5405    speed:  32.18142316593761  batches / s
batch:  40    loss:  4.53151    speed:  33.34346125196656  batches / s
batch:  50    loss:  4.51881    speed:  33.8641754214771  batches / s
batch:  60    loss:  4.50038    speed:  33.92192586226739  batches / s
batch:  70    loss:  4.4721    speed:  32.80567013591075  batches / s
batch:  80    loss:  4.40019    speed:  32.37977409757958  batches / s
batch:  90    loss:  3.55288    speed:  32.17070807781372  batches / s
batch:  100    loss:  3.31405    speed:  30.809267143703135  batches / s
batch:  110    loss:  3.25453    speed:  32.47462223308244  batches / s
batch:  120    loss:  3.26619    speed:  31.326625260858904  batches / s
batch:  130    loss:  3.24238    speed:  28.22428294051218  batches / s
ba

batch:  1140    loss:  1.60936    speed:  31.27299916149104  batches / s
batch:  1150    loss:  1.63791    speed:  33.55649102628937  batches / s
batch:  1160    loss:  1.63223    speed:  33.443941244673496  batches / s
batch:  1170    loss:  1.56051    speed:  33.97972495592637  batches / s
batch:  1180    loss:  1.62286    speed:  33.287656729928734  batches / s
batch:  1190    loss:  1.6384    speed:  33.749710525074924  batches / s
batch:  1200    loss:  1.62572    speed:  33.92170364220227  batches / s
batch:  1210    loss:  1.56747    speed:  32.485306769496816  batches / s
batch:  1220    loss:  1.60776    speed:  31.21883831763945  batches / s
batch:  1230    loss:  1.61258    speed:  32.259412716655596  batches / s
batch:  1240    loss:  1.61162    speed:  32.015742604147626  batches / s
batch:  1250    loss:  1.57572    speed:  32.94674898305691  batches / s
batch:  1260    loss:  1.594    speed:  32.82734825515411  batches / s
batch:  1270    loss:  1.61038    speed:  32.218

batch:  2260    loss:  1.4087    speed:  30.673061080527624  batches / s
batch:  2270    loss:  1.48642    speed:  32.104743587496344  batches / s
batch:  2280    loss:  1.41815    speed:  32.55955525349627  batches / s
batch:  2290    loss:  1.4519    speed:  32.86127982352406  batches / s
batch:  2300    loss:  1.38953    speed:  32.21245098402135  batches / s
batch:  2310    loss:  1.42329    speed:  30.470600072574925  batches / s
batch:  2320    loss:  1.44504    speed:  32.559519868057336  batches / s
batch:  2330    loss:  1.4476    speed:  33.18785225908703  batches / s
batch:  2340    loss:  1.38164    speed:  32.69301805226871  batches / s
batch:  2350    loss:  1.38456    speed:  33.160550975512855  batches / s
batch:  2360    loss:  1.41949    speed:  32.650122853336065  batches / s
batch:  2370    loss:  1.37504    speed:  33.51405694531684  batches / s
batch:  2380    loss:  1.43757    speed:  31.322079808610123  batches / s
batch:  2390    loss:  1.42388    speed:  31.78

'./model.ckpt'

generate text

In [6]:
TEST_PREFIX = "город просыпался "

for i in range(len(TEST_PREFIX)):
    out = net.run_step( embed_to_vocab(TEST_PREFIX[i], vocab) , i==0)

print("SENTENCE:")
gen_str = TEST_PREFIX
for i in range(LEN_TEST_TEXT):
    element = np.random.choice( range(len(vocab)), p=out ) # Sample character from the network according to the generated output probabilities
    gen_str += vocab[element]

    out = net.run_step( embed_to_vocab(vocab[element], vocab) , False )
print(gen_str)

SENTENCE:
город просыпался вскорить смотреть в грядь. а в этом чутом что-нибудь всё промельно не совсем подамил, спысьми!.. и смешно, наяне в эту оди рукой и — «случайно бывал, убежал.

— заясть! — кричко услышал сположиться у многого я потой дал предореги, то скументе и ундерживается подлятон изумительности, и известиям, и как она часта своему) доеже в пьяночеки, из бешит: лучше как уж я за беспокоил, а самаясь надобным всеми назад то, разумеется, руки женстое. мечтается. пожалуй» (сейчас с эти и, и обидни начилось еще в
