import dependencies

In [1]:
import tensorflow as tf
import numpy as np
import random
import time
import sys

RNN with num_layers LSTM layers and a fully-connected output layer
The network allows for a dynamic number of iterations, depending on the inputs it receives.

out (fc layer; out_size) <- lstm <- lstm <- in (in_size)

In [2]:
class ModelNetwork:
    def __init__(self, in_size, lstm_size, num_layers, out_size, session, learning_rate=0.003, name="rnn"):
        self.scope = name

        self.in_size = in_size
        self.lstm_size = lstm_size
        self.num_layers = num_layers
        self.out_size = out_size

        self.session = session

        self.learning_rate = tf.constant( learning_rate )

        # Last state of LSTM, used when running the network in TEST mode
        self.lstm_last_state = np.zeros((self.num_layers*2*self.lstm_size,))

        with tf.variable_scope(self.scope):
            ## (batch_size, timesteps, in_size)
            self.xinput = tf.placeholder(tf.float32, shape=(None, None, self.in_size), name="xinput")
            self.lstm_init_value = tf.placeholder(tf.float32, shape=(None, self.num_layers*2*self.lstm_size), name="lstm_init_value")

            # LSTM
            self.lstm_cells = [ tf.contrib.rnn.BasicLSTMCell(self.lstm_size, forget_bias=1.0, state_is_tuple=False) for i in range(self.num_layers)]
            self.lstm = tf.contrib.rnn.MultiRNNCell(self.lstm_cells, state_is_tuple=False)

            # Iteratively compute output of recurrent network
            outputs, self.lstm_new_state = tf.nn.dynamic_rnn(self.lstm, self.xinput, initial_state=self.lstm_init_value, dtype=tf.float32)

            # Linear activation (FC layer on top of the LSTM net)
            self.rnn_out_W = tf.Variable(tf.random_normal( (self.lstm_size, self.out_size), stddev=0.01 ))
            self.rnn_out_B = tf.Variable(tf.random_normal( (self.out_size, ), stddev=0.01 ))

            outputs_reshaped = tf.reshape( outputs, [-1, self.lstm_size] )
            network_output = ( tf.matmul( outputs_reshaped, self.rnn_out_W ) + self.rnn_out_B )

            batch_time_shape = tf.shape(outputs)
            self.final_outputs = tf.reshape( tf.nn.softmax( network_output), (batch_time_shape[0], batch_time_shape[1], self.out_size) )


            ## Training: provide target outputs for supervised training.
            self.y_batch = tf.placeholder(tf.float32, (None, None, self.out_size))
            y_batch_long = tf.reshape(self.y_batch, [-1, self.out_size])

            self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=network_output, labels=y_batch_long) )
            self.train_op = tf.train.RMSPropOptimizer(self.learning_rate, 0.9).minimize(self.cost)


    ## Input: X is a single element, not a list!
    def run_step(self, x, init_zero_state=True):
        ## Reset the initial state of the network.
        if init_zero_state:
            init_value = np.zeros((self.num_layers*2*self.lstm_size,))
        else:
            init_value = self.lstm_last_state

        out, next_lstm_state = self.session.run([self.final_outputs, self.lstm_new_state], feed_dict={self.xinput:[x], self.lstm_init_value:[init_value]   } )

        self.lstm_last_state = next_lstm_state[0]

        return out[0][0]


    ## xbatch must be (batch_size, timesteps, input_size)
    ## ybatch must be (batch_size, timesteps, output_size)
    def train_batch(self, xbatch, ybatch):
        init_value = np.zeros((xbatch.shape[0], self.num_layers*2*self.lstm_size))

        cost, _ = self.session.run([self.cost, self.train_op], feed_dict={self.xinput:xbatch, self.y_batch:ybatch, self.lstm_init_value:init_value   } )

        return cost

Embed string to character-arrays -- it generates an array len(data) x len(vocab)

Vocab is a list of elements

In [3]:
def embed_to_vocab(data_, vocab):
    data = np.zeros((len(data_), len(vocab)))

    cnt=0
    for s in data_:
        v = [0.0]*len(vocab)
        v[vocab.index(s)] = 1.0
        data[cnt, :] = v
        cnt += 1

    return data

def decode_embed(array, vocab):
    return vocab[ array.index(1) ]

In [4]:
ckpt_file = ""
TEST_PREFIX = "The " # Prefix to prompt the network in test mode

## Load the data
data_ = ""
with open('data/dostoevski.txt', 'r', encoding="utf-8") as f:
    data_ += f.read()
data_ = data_.lower()

## Convert to 1-hot coding
vocab = list(set(data_))

data = embed_to_vocab(data_, vocab)


in_size = out_size = len(vocab)
lstm_size = 256 #128
num_layers = 3
batch_size = 64 #128
time_steps = 100 #50

NUM_TRAIN_BATCHES = 2500

LEN_TEST_TEXT = 500 # Number of test characters of text to generate after training the network



## Initialize the network
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.InteractiveSession(config=config)

net = ModelNetwork(in_size = in_size,lstm_size = lstm_size,num_layers = num_layers,out_size = out_size,session = sess,learning_rate = 0.003)

sess.run(tf.global_variables_initializer())

saver = tf.train.Saver(tf.global_variables())




train network

In [5]:
if ckpt_file == "":
    last_time = time.time()

    batch = np.zeros((batch_size, time_steps, in_size))
    batch_y = np.zeros((batch_size, time_steps, in_size))

    possible_batch_ids = range(data.shape[0]-time_steps-1)
    for i in range(NUM_TRAIN_BATCHES):
        # Sample time_steps consecutive samples from the dataset text file
        batch_id = random.sample( possible_batch_ids, batch_size )

        for j in range(time_steps):
            ind1 = [k+j for k in batch_id]
            ind2 = [k+j+1 for k in batch_id]

            batch[:, j, :] = data[ind1, :]
            batch_y[:, j, :] = data[ind2, :]


        cst = net.train_batch(batch, batch_y)

        if (i%10) == 0:
            new_time = time.time()
            diff = new_time - last_time
            last_time = new_time

            print("batch: ",i,"   loss: ",cst,"   speed: ",(100.0/diff)," batches / s")

saver.save(sess, "./model.ckpt")

batch:  0    loss:  4.5515    speed:  87.57701322225181  batches / s
batch:  10    loss:  4.54942    speed:  9.669396094308187  batches / s
batch:  20    loss:  4.54596    speed:  9.710806786616459  batches / s
batch:  30    loss:  4.54032    speed:  8.279512670121635  batches / s
batch:  40    loss:  4.53194    speed:  9.288436836165179  batches / s
batch:  50    loss:  4.5188    speed:  9.021156132011201  batches / s
batch:  60    loss:  4.5009    speed:  8.801789340810968  batches / s
batch:  70    loss:  4.47396    speed:  9.008566947197107  batches / s
batch:  80    loss:  4.40545    speed:  8.966771050502736  batches / s
batch:  90    loss:  3.53479    speed:  8.901748018268357  batches / s
batch:  100    loss:  3.3093    speed:  9.207364611444557  batches / s
batch:  110    loss:  3.26077    speed:  9.118013125030748  batches / s
batch:  120    loss:  3.23998    speed:  8.878695475895867  batches / s
batch:  130    loss:  3.28134    speed:  8.230330675388222  batches / s
batch: 

batch:  1150    loss:  1.5624    speed:  8.73667230690401  batches / s
batch:  1160    loss:  1.59771    speed:  8.84454854604073  batches / s
batch:  1170    loss:  1.57376    speed:  9.19462722818457  batches / s
batch:  1180    loss:  1.55138    speed:  8.831104255230986  batches / s
batch:  1190    loss:  1.54214    speed:  8.892512538297153  batches / s
batch:  1200    loss:  1.53164    speed:  8.685744001715983  batches / s
batch:  1210    loss:  1.54347    speed:  8.630468242014206  batches / s
batch:  1220    loss:  1.57389    speed:  8.951385165936493  batches / s
batch:  1230    loss:  1.55445    speed:  9.002095400271244  batches / s
batch:  1240    loss:  1.51904    speed:  8.984358474511666  batches / s
batch:  1250    loss:  1.56138    speed:  8.9742680125581  batches / s
batch:  1260    loss:  1.54877    speed:  8.969696513474613  batches / s
batch:  1270    loss:  1.54194    speed:  8.860036545741144  batches / s
batch:  1280    loss:  1.55347    speed:  9.1453161506591

batch:  2280    loss:  1.31761    speed:  8.254337173499831  batches / s
batch:  2290    loss:  1.25886    speed:  8.746241645979259  batches / s
batch:  2300    loss:  1.28515    speed:  8.310052409766287  batches / s
batch:  2310    loss:  1.26754    speed:  8.716034554057734  batches / s
batch:  2320    loss:  1.306    speed:  9.060559379215642  batches / s
batch:  2330    loss:  1.29526    speed:  8.736781862388995  batches / s
batch:  2340    loss:  1.3165    speed:  9.156245061601847  batches / s
batch:  2350    loss:  1.27769    speed:  9.101083599241544  batches / s
batch:  2360    loss:  1.29796    speed:  8.970915700058534  batches / s
batch:  2370    loss:  1.30569    speed:  9.244544064538022  batches / s
batch:  2380    loss:  1.26502    speed:  9.118552703164685  batches / s
batch:  2390    loss:  1.27709    speed:  8.92514562849793  batches / s
batch:  2400    loss:  1.29128    speed:  8.546053655709875  batches / s
batch:  2410    loss:  1.29486    speed:  8.53696647674

'./model.ckpt'

generate text

In [9]:
TEST_PREFIX = "город просыпался "

for i in range(len(TEST_PREFIX)):
    out = net.run_step( embed_to_vocab(TEST_PREFIX[i], vocab) , i==0)

print("SENTENCE:")
gen_str = TEST_PREFIX
for i in range(LEN_TEST_TEXT):
    element = np.random.choice( range(len(vocab)), p=out ) # Sample character from the network according to the generated output probabilities
    gen_str += vocab[element]

    out = net.run_step( embed_to_vocab(vocab[element], vocab) , False )
print(gen_str)

SENTENCE:
город просыпался распистроской.

— так! подаль, а тут вот уж и аршился на дорогу! ну, болезнь!!.. хотя не выдержить, — сказал разумихин, — придавила пугу противоречило скажет, лицо его осоветия ее, присужден а родя…

— вздор! — как бы вылоси, — вскочила дуня, — ох, познаний, что «ною, господино ставшись, дуня попросила соня.

неразрыми ей было красоты, после передолжиться говорющими-то другому листое положения.

— метяко, родион романыч.) — крикнул разумихин, — вы, авдотья романовна, и, родя.

— ну так разумихин
