## SIMPLE CHAR-RNN 

In [1]:
from __future__ import print_function
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
tf.set_random_seed(0)  
print ("TENSORFLOW VERSION IS %s" % (tf.__version__))

TENSORFLOW VERSION IS 1.1.0


## DEFINE TRAINING SEQUENCE

In [2]:
quote1 = ("If you want to build a ship, "
          "don't drum up people to collect wood and don't assign them tasks and work,"
          " but rather teach them to long for the endless immensity of the sea.")
quote2 = ("Perfection is achieved, "
          "not when there is nothing more to add, "
          "but when there is nothing left to take away.")
sentence = quote2
print ("FOLLOWING IS OUR TRAINING SEQUENCE:")
print (sentence)

FOLLOWING IS OUR TRAINING SEQUENCE:
Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away.


## DEFINE VOCABULARY AND DICTIONARY

In [3]:
char_set = list(set(sentence))
char_dic = {w: i for i, w in enumerate(char_set)}
print ("VOCABULARY: ")
print (char_set)
print ("DICTIONARY: ")
print (char_dic)

VOCABULARY: 
[' ', ',', '.', 'P', 'a', 'c', 'b', 'e', 'd', 'g', 'f', 'i', 'h', 'k', 'm', 'l', 'o', 'n', 's', 'r', 'u', 't', 'w', 'v', 'y']
DICTIONARY: 
{' ': 0, ',': 1, '.': 2, 'P': 3, 'a': 4, 'c': 5, 'b': 6, 'e': 7, 'd': 8, 'g': 9, 'f': 10, 'i': 11, 'h': 12, 'k': 13, 'm': 14, 'l': 15, 'o': 16, 'n': 17, 's': 18, 'r': 19, 'u': 20, 't': 21, 'w': 22, 'v': 23, 'y': 24}


VOCAB: NUMBER => CHAR / DICTIONARY: CHAR => NUMBER

## CONFIGURE NETWORK

In [4]:
data_dim        = len(char_set)
num_classes     = len(char_set)
hidden_size     = 64
sequence_length = 10  # Any arbitrary number 
print ("DATA_DIM IS [%d]" % (data_dim))

DATA_DIM IS [25]


## SET TRAINING BATCHES

In [5]:
def print_np(_name, _x):
    print("TYPE  OF [%s] is [%s]" % (_name, type(_x)))
    print("SHAPE OF [%s] is %s" % (_name, _x.shape,))
def print_list(_name, _x):
    print("TYPE   OF [%s] is [%s]" % (_name, type(_x)))
    print("LENGTH OF [%s] is %s" % (_name, len(_x)))
    print("%s[0] LOOKS LIKE %s" % (_name, _x[0]))

In [6]:
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    x = [char_dic[c] for c in x_str]  # x str to index
    y = [char_dic[c] for c in y_str]  # y str to index
    dataX.append(x)
    dataY.append(y)
    if i < 5:
        print ("[%4d/%4d] [%s]=>[%s]" % (i, len(sentence), x_str, y_str))
        print ("%s%s => %s" % (' '*12, x, y))
print_list('dataX', dataX)
print_list('dataY', dataY)

[   0/ 107] [Perfection]=>[erfection ]
            [3, 7, 19, 10, 7, 5, 21, 11, 16, 17] => [7, 19, 10, 7, 5, 21, 11, 16, 17, 0]
[   1/ 107] [erfection ]=>[rfection i]
            [7, 19, 10, 7, 5, 21, 11, 16, 17, 0] => [19, 10, 7, 5, 21, 11, 16, 17, 0, 11]
[   2/ 107] [rfection i]=>[fection is]
            [19, 10, 7, 5, 21, 11, 16, 17, 0, 11] => [10, 7, 5, 21, 11, 16, 17, 0, 11, 18]
[   3/ 107] [fection is]=>[ection is ]
            [10, 7, 5, 21, 11, 16, 17, 0, 11, 18] => [7, 5, 21, 11, 16, 17, 0, 11, 18, 0]
[   4/ 107] [ection is ]=>[ction is a]
            [7, 5, 21, 11, 16, 17, 0, 11, 18, 0] => [5, 21, 11, 16, 17, 0, 11, 18, 0, 4]
TYPE   OF [dataX] is [<type 'list'>]
LENGTH OF [dataX] is 97
dataX[0] LOOKS LIKE [3, 7, 19, 10, 7, 5, 21, 11, 16, 17]
TYPE   OF [dataY] is [<type 'list'>]
LENGTH OF [dataY] is 97
dataY[0] LOOKS LIKE [7, 19, 10, 7, 5, 21, 11, 16, 17, 0]


In [7]:
ndata      = len(dataX)
batch_size = 512
print ("     'NDATA' IS %d" % (ndata))
print ("'BATCH_SIZE' IS %d" % (batch_size))

     'NDATA' IS 97
'BATCH_SIZE' IS 512


## DEFINE PLACEHOLDERS

In [8]:
X = tf.placeholder(tf.int32, [None, sequence_length])
X_OH = tf.one_hot(X, num_classes)
Y = tf.placeholder(tf.int32, [None, sequence_length])
print ("'sequence_length' IS [%d]" % (sequence_length))
print ("    'num_classes' IS [%d]" % (num_classes))
print("'X' LOOKS LIKE \n   [%s]" % (X))  
print("'X_OH' LOOKS LIKE \n   [%s]" % (X_OH))
print("'Y' LOOKS LIKE \n   [%s]" % (Y))

'sequence_length' IS [10]
    'num_classes' IS [25]
'X' LOOKS LIKE 
   [Tensor("Placeholder:0", shape=(?, 10), dtype=int32)]
'X_OH' LOOKS LIKE 
   [Tensor("one_hot:0", shape=(?, 10, 25), dtype=float32)]
'Y' LOOKS LIKE 
   [Tensor("Placeholder_1:0", shape=(?, 10), dtype=int32)]


## DEFINE MODEL

In [9]:
with tf.variable_scope('CHAR-RNN', reuse=False):
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True, reuse=False)
    # cell = rnn.MultiRNNCell([cell]*2, state_is_tuple=True) # BUG IN TF1.1..
    # DYNAMIC RNN WITH FULLY CONNECTED LAYER
    _hiddens  = tf.contrib.layers.fully_connected(X_OH, hidden_size, activation_fn=tf.nn.relu)
    _rnnouts, _states = tf.nn.dynamic_rnn(cell, _hiddens, dtype=tf.float32)
    _denseouts = tf.contrib.layers.fully_connected(_rnnouts, num_classes, activation_fn=None)
    # RESHAPE FOR SEQUNCE LOSS
    outputs = tf.reshape(_denseouts, [batch_size, sequence_length, num_classes])
    
print ("_hiddens   LOOKS LIKE [%s]" % (_hiddens))
print ("_rnnouts   LOOKS LIKE [%s]" % (_rnnouts))
print ("_denseouts LOOKS LIKE [%s]" % (_denseouts))
print ("outputs    LOOKS LIKE [%s]" % (outputs))
print ("MODEL DEFINED.")

_hiddens   LOOKS LIKE [Tensor("CHAR-RNN/fully_connected/Relu:0", shape=(?, 10, 64), dtype=float32)]
_rnnouts   LOOKS LIKE [Tensor("CHAR-RNN/rnn/transpose:0", shape=(?, 10, 64), dtype=float32)]
_denseouts LOOKS LIKE [Tensor("CHAR-RNN/fully_connected_1/BiasAdd:0", shape=(?, 10, 25), dtype=float32)]
outputs    LOOKS LIKE [Tensor("CHAR-RNN/Reshape:0", shape=(512, 10, 25), dtype=float32)]
MODEL DEFINED.


## DEFINE TF FUNCTIONS

In [10]:
weights  = tf.ones([batch_size, sequence_length]) # EQUAL WEIGHTS
seq_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights) # THIS IS A CLASSIFICATION LOSS
print ("weights LOOKS LIKE [%s]" % (weights))
print ("outputs LOOKS LIKE [%s]" % (outputs))
print ("Y       LOOKS LIKE [%s]" % (Y))

weights LOOKS LIKE [Tensor("ones:0", shape=(512, 10), dtype=float32)]
outputs LOOKS LIKE [Tensor("CHAR-RNN/Reshape:0", shape=(512, 10, 25), dtype=float32)]
Y       LOOKS LIKE [Tensor("Placeholder_1:0", shape=(?, 10), dtype=int32)]


In [11]:
loss  = tf.reduce_mean(seq_loss)
optm  = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
print ("FUNCTIONS DEFINED.")

FUNCTIONS DEFINED.


## OPTIMIZE

In [12]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
MAXITER = 2000
for i in range(MAXITER):
    randidx = np.random.randint(low=0, high=ndata, size=batch_size)
    batchX = [dataX[iii] for iii in randidx]
    batchY = [dataY[iii] for iii in randidx]
    feeds = {X: batchX, Y: batchY}
    _, loss_val, results = sess.run(
        [optm, loss, outputs], feed_dict=feeds)
    if (i%200) == 0:
        print ("[%5d/%d] loss_val: %.5f " % (i, MAXITER, loss_val))

[    0/2000] loss_val: 3.21126 
[  200/2000] loss_val: 0.17822 
[  400/2000] loss_val: 0.16920 
[  600/2000] loss_val: 0.17338 
[  800/2000] loss_val: 0.16925 
[ 1000/2000] loss_val: 0.17624 
[ 1200/2000] loss_val: 0.17331 
[ 1400/2000] loss_val: 0.17291 
[ 1600/2000] loss_val: 0.17237 
[ 1800/2000] loss_val: 0.16771 


#### BATCH LOOKS LIKE

In [13]:
print ("LENGTH OF BATCHX IS %d" % (len(batchX)))
print ("batchX[0] looks like %s" % (batchX[0]))
print ("LENGTH OF BATCHY IS %d" % (len(batchY)))
print ("batchY[0] looks like %s" % (batchY[0]))

LENGTH OF BATCHX IS 512
batchX[0] looks like [21, 12, 7, 19, 7, 0, 11, 18, 0, 17]
LENGTH OF BATCHY IS 512
batchY[0] looks like [12, 7, 19, 7, 0, 11, 18, 0, 17, 16]


## PRINT CHARS

In [14]:
randidx = np.random.randint(low=0, high=ndata, size=batch_size)
batchX = [dataX[iii] for iii in randidx]
batchY = [dataY[iii] for iii in randidx]
feeds = {X: batchX}
results = sess.run(outputs, feed_dict=feeds)
for j, result in enumerate(results):
    index = np.argmax(result, axis=1)
    chars = [char_set[t] for t in index]
    if j < 10:
        print ("OUT OF BATCHX:   %s => %s" % (index, chars))
        print ("BATCHY (TARGET): %s\n" % (batchY[j]))

OUT OF BATCHX:   [12 16 17  0 11 18  0  4  5 12] => ['h', 'o', 'n', ' ', 'i', 's', ' ', 'a', 'c', 'h']
BATCHY (TARGET): [11, 16, 17, 0, 11, 18, 0, 4, 5, 12]

OUT OF BATCHX:   [21 12  0 21  8  8  1  0  6 20] => ['t', 'h', ' ', 't', 'd', 'd', ',', ' ', 'b', 'u']
BATCHY (TARGET): [21, 16, 0, 4, 8, 8, 1, 0, 6, 20]

OUT OF BATCHX:   [ 7 10 21  0 21 16  0 21  4 13] => ['e', 'f', 't', ' ', 't', 'o', ' ', 't', 'a', 'k']
BATCHY (TARGET): [7, 10, 21, 0, 21, 16, 0, 21, 4, 13]

OUT OF BATCHX:   [21 18  0 17 16 21 12 11 17  9] => ['t', 's', ' ', 'n', 'o', 't', 'h', 'i', 'n', 'g']
BATCHY (TARGET): [11, 18, 0, 17, 16, 21, 12, 11, 17, 9]

OUT OF BATCHX:   [ 1  0  6 16 21  0 22 12  7 17] => [',', ' ', 'b', 'o', 't', ' ', 'w', 'h', 'e', 'n']
BATCHY (TARGET): [1, 0, 17, 16, 21, 0, 22, 12, 7, 17]

OUT OF BATCHX:   [21 12  0 21  4 13  7  0  4 22] => ['t', 'h', ' ', 't', 'a', 'k', 'e', ' ', 'a', 'w']
BATCHY (TARGET): [21, 16, 0, 21, 4, 13, 7, 0, 4, 22]

OUT OF BATCHX:   [21  0 22 12  7 17  0 21 12  7] => ['

### SAMPLING FUNCTION 

In [15]:
LEN = 1 # <= LENGHT IS 1 !!
# XL = tf.placeholder(tf.int32, [None, LEN])
XL     = tf.placeholder(tf.int32, [None, 1])
XL_OH  = tf.one_hot(XL, num_classes)
with tf.variable_scope('CHAR-RNN', reuse=True):
    cell_L = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True, reuse=True)
    # cell_L = rnn.MultiRNNCell([cell_L] * 2, state_is_tuple=True) # BUG IN TF1.1
    istate = cell_L.zero_state(batch_size=1, dtype=tf.float32)
    # DYNAMIC RNN WITH FULLY CONNECTED LAYER
    _hiddens  = tf.contrib.layers.fully_connected(XL_OH, hidden_size, activation_fn=tf.nn.relu)
    _outputs_L, states_L = tf.nn.dynamic_rnn(cell_L, _hiddens
                                , initial_state=istate, dtype=tf.float32)
    _outputs_L  = tf.contrib.layers.fully_connected(
        _outputs_L, num_classes, activation_fn=None)
    # RESHAPE FOR SEQUNCE LOSS
    outputs_L = tf.reshape(_outputs_L, [LEN, 1, num_classes])
print ("XL    LOOKS LIKE %s" % (XL))
print ("XL_OH LOOKS LIKE %s" % (XL_OH))

XL    LOOKS LIKE Tensor("Placeholder_2:0", shape=(?, 1), dtype=int32)
XL_OH LOOKS LIKE Tensor("one_hot_1:0", shape=(?, 1, 25), dtype=float32)


#### HELPER FUNCTION

In [16]:
def weighted_pick(weights):
    t = np.cumsum(weights)
    s = np.sum(weights)
    return(int(np.searchsorted(t, np.random.rand(1)*s)))
def softmax(x):
    alpha = 1
    e_x = np.exp(alpha*(x - np.max(x)))
    return e_x / np.sum(e_x) # only difference

## SAMPLE

### BURNIN

In [27]:
prime = "Perfection is"
istateval = sess.run(cell_L.zero_state(1, tf.float32))
for i, c in enumerate(prime[:-1]):
    index = char_dic[c]
    inval = [[index]]
    outval, stateval = sess.run([outputs_L, states_L]
                        , feed_dict={XL:inval, istate:istateval})
    istateval = stateval # UPDATE STATE MANUALLY!!
    if i < 3:
        print ("[%d] -char:  %s \n    -inval: %s \n    -outval: %s " 
               % (i, c, inval, outval))

[0] -char:  P 
    -inval: [[3]] 
    -outval: [[[ -3.08020091  -2.35454082  -2.51301169  -3.17741561  -2.92541146
    -5.78698063   1.06790924  12.32919025  -6.53671503   2.24559045
    -0.23400514   4.69022512   1.29558206  -2.14247799  -0.26732731
     0.30243468   3.60911059  -1.01301849  -3.98470902   0.82241881
    -8.28079796  -0.18096907  -1.79957652  -5.48043156  -0.57275331]]] 
[1] -char:  e 
    -inval: [[7]] 
    -outval: [[[  2.90841818  -3.15261364  -5.26657581  -2.76032543  -5.69346333
     3.46740174  -0.99838358   2.42937207   4.03632069   0.44059879
     7.37859058  -4.35361147  -1.64185107  -1.93758237  -4.03283739
    -4.43539476  -5.94669437   5.06936741  -5.20767164  14.28791046
    -1.07765269  -1.04316759  -6.34071636   5.14418316  -3.26660395]]] 
[2] -char:  r 
    -inval: [[19]] 
    -outval: [[[  4.48692799  -1.21608675  -5.32355404  -2.51072145  -7.60606241
     2.06190777  -5.92302847  10.31890488   1.89060163  -1.53012657
    16.06913948   2.20325446   1.2

### SAMPLE

In [28]:
inval  = [[char_dic[prime[-1]]]]
outval, stateval = sess.run([outputs_L, states_L]
                    , feed_dict={XL:inval, istate:istateval})
istateval = stateval
index = np.argmax(outval)
char  = char_set[index]
chars = char
for i in range(100):
    inval = [[index]]
    outval, stateval = sess.run([outputs_L, states_L]
                        , feed_dict={XL:inval, istate:istateval})
    istateval = stateval
    # index = np.argmax(outval)
    index = weighted_pick(softmax(outval))
    char  = char_set[index]
    chars += char
    if i < 5:
        print ("[%d] \n -inval: %s \n -outval: %s \n -index: %d (char: %s) \n -chars: %s" 
               % (i, inval, outval, index, char, chars))

[0] 
 -inval: [[0]] 
 -outval: [[[  1.91947448  -3.7294488   -1.18163824  -2.31040144  13.51008797
    -2.56648397   0.63045412  -0.56764245  -4.33099174  -4.37447071
    -7.52064991   1.96726823  -0.72000146  -5.25976324   0.30930054
     0.08798702   2.4407711    4.32700539  -1.58308876  -5.40037489
    -5.36968231   1.45565033   3.33320498  -3.09893799  -0.98546827]]] 
 -index: 4 (char: a) 
 -chars:  a
[1] 
 -inval: [[4]] 
 -outval: [[[ -0.14383642  -1.11709249   0.15476504  -1.82591987   1.68663216
    14.28822708  -0.88725519  -3.34232259   5.69124222  -1.5407052
    -3.50985885 -11.68089104   2.6237607    2.44011259  -1.07054639
    -1.42917883   2.69174647   1.92260468   1.94160521  -2.04601884
     2.52123785  -3.39278913   1.23125076   1.36892271   2.04283261]]] 
 -index: 5 (char: c) 
 -chars:  ac
[2] 
 -inval: [[5]] 
 -outval: [[[  2.96251321  -7.23301744  -2.77196884  -2.63052106   1.21266544
    -1.01473677  -3.11380339  -0.32464457  -2.0736959    0.48602808
     2.26947927

### SAMPLED SENTENCE

In [19]:
print ("<SAMPLED SETENCE> \n %s" % (prime+chars))
print ("\n<ORIGINAL SENTENCE> \n %s" % (sentence))

<SAMPLED SETENCE> 
 Perfection is achieved, not when there is nothing left to take away..y.ed, but when there is nothing more to add, 

<ORIGINAL SENTENCE> 
 Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away.
