## SIMPLE CHAR-RNN 

In [1]:
from __future__ import print_function
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
tf.set_random_seed(0)  
print ("TENSORFLOW VERSION IS %s" % (tf.__version__))

TENSORFLOW VERSION IS 1.1.0


## DEFINE TRAINING SEQUENCE

In [2]:
quote1 = ("If you want to build a ship, "
          "don't drum up people to collect wood and don't assign them tasks and work,"
          " but rather teach them to long for the endless immensity of the sea.")
quote2 = ("Perfection is achieved, "
          "not when there is nothing more to add, "
          "but when there is nothing left to take away.")
sentence = quote2
print ("FOLLOWING IS OUR TRAINING SEQUENCE:")
print (sentence)

FOLLOWING IS OUR TRAINING SEQUENCE:
Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away.


## DEFINE VOCABULARY AND DICTIONARY

In [3]:
char_set = list(set(sentence))
char_dic = {w: i for i, w in enumerate(char_set)}
print ("VOCABULARY: ")
print (char_set)
print ("DICTIONARY: ")
print (char_dic)

VOCABULARY: 
[' ', ',', '.', 'P', 'a', 'c', 'b', 'e', 'd', 'g', 'f', 'i', 'h', 'k', 'm', 'l', 'o', 'n', 's', 'r', 'u', 't', 'w', 'v', 'y']
DICTIONARY: 
{' ': 0, ',': 1, '.': 2, 'P': 3, 'a': 4, 'c': 5, 'b': 6, 'e': 7, 'd': 8, 'g': 9, 'f': 10, 'i': 11, 'h': 12, 'k': 13, 'm': 14, 'l': 15, 'o': 16, 'n': 17, 's': 18, 'r': 19, 'u': 20, 't': 21, 'w': 22, 'v': 23, 'y': 24}


VOCAB: NUMBER => CHAR / DICTIONARY: CHAR => NUMBER

## CONFIGURE NETWORK

In [4]:
data_dim        = len(char_set)
num_classes     = len(char_set)
hidden_size     = 64
sequence_length = 10  # Any arbitrary number 
print ("DATA_DIM IS [%d]" % (data_dim))

DATA_DIM IS [25]


## SET TRAINING BATCHES

In [5]:
def print_np(_name, _x):
    print("TYPE  OF [%s] is [%s]" % (_name, type(_x)))
    print("SHAPE OF [%s] is %s" % (_name, _x.shape,))
def print_list(_name, _x):
    print("TYPE   OF [%s] is [%s]" % (_name, type(_x)))
    print("LENGTH OF [%s] is %s" % (_name, len(_x)))
    print("%s[0] LOOKS LIKE %s" % (_name, _x[0]))

In [6]:
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    x = [char_dic[c] for c in x_str]  # x str to index
    y = [char_dic[c] for c in y_str]  # y str to index
    dataX.append(x)
    dataY.append(y)
    if i < 5:
        print ("[%4d/%4d] [%s]=>[%s]" % (i, len(sentence), x_str, y_str))
        print ("%s%s => %s" % (' '*12, x, y))
print_list('dataX', dataX)
print_list('dataY', dataY)

[   0/ 107] [Perfection]=>[erfection ]
            [3, 7, 19, 10, 7, 5, 21, 11, 16, 17] => [7, 19, 10, 7, 5, 21, 11, 16, 17, 0]
[   1/ 107] [erfection ]=>[rfection i]
            [7, 19, 10, 7, 5, 21, 11, 16, 17, 0] => [19, 10, 7, 5, 21, 11, 16, 17, 0, 11]
[   2/ 107] [rfection i]=>[fection is]
            [19, 10, 7, 5, 21, 11, 16, 17, 0, 11] => [10, 7, 5, 21, 11, 16, 17, 0, 11, 18]
[   3/ 107] [fection is]=>[ection is ]
            [10, 7, 5, 21, 11, 16, 17, 0, 11, 18] => [7, 5, 21, 11, 16, 17, 0, 11, 18, 0]
[   4/ 107] [ection is ]=>[ction is a]
            [7, 5, 21, 11, 16, 17, 0, 11, 18, 0] => [5, 21, 11, 16, 17, 0, 11, 18, 0, 4]
TYPE   OF [dataX] is [<type 'list'>]
LENGTH OF [dataX] is 97
dataX[0] LOOKS LIKE [3, 7, 19, 10, 7, 5, 21, 11, 16, 17]
TYPE   OF [dataY] is [<type 'list'>]
LENGTH OF [dataY] is 97
dataY[0] LOOKS LIKE [7, 19, 10, 7, 5, 21, 11, 16, 17, 0]


In [7]:
ndata      = len(dataX)
batch_size = 512
print ("     'NDATA' IS %d" % (ndata))
print ("'BATCH_SIZE' IS %d" % (batch_size))

     'NDATA' IS 97
'BATCH_SIZE' IS 512


## DEFINE PLACEHOLDERS

In [8]:
X = tf.placeholder(tf.int32, [None, sequence_length])
X_OH = tf.one_hot(X, num_classes)
Y = tf.placeholder(tf.int32, [None, sequence_length])
print ("'sequence_length' IS [%d]" % (sequence_length))
print ("    'num_classes' IS [%d]" % (num_classes))
print("'X' LOOKS LIKE \n   [%s]" % (X))  
print("'X_OH' LOOKS LIKE \n   [%s]" % (X_OH))
print("'Y' LOOKS LIKE \n   [%s]" % (Y))

'sequence_length' IS [10]
    'num_classes' IS [25]
'X' LOOKS LIKE 
   [Tensor("Placeholder:0", shape=(?, 10), dtype=int32)]
'X_OH' LOOKS LIKE 
   [Tensor("one_hot:0", shape=(?, 10, 25), dtype=float32)]
'Y' LOOKS LIKE 
   [Tensor("Placeholder_1:0", shape=(?, 10), dtype=int32)]


## DEFINE MODEL

In [9]:
with tf.variable_scope('CHAR-RNN', reuse=False):
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True, reuse=False)
    # cell = rnn.MultiRNNCell([cell]*2, state_is_tuple=True) # BUG IN TF1.1..
    # DYNAMIC RNN WITH FULLY CONNECTED LAYER
    _hiddens  = tf.contrib.layers.fully_connected(X_OH, hidden_size, activation_fn=tf.nn.relu)
    _rnnouts, _states = tf.nn.dynamic_rnn(cell, _hiddens, dtype=tf.float32)
    _denseouts = tf.contrib.layers.fully_connected(_rnnouts, num_classes, activation_fn=None)
    # RESHAPE FOR SEQUNCE LOSS
    outputs = tf.reshape(_denseouts, [batch_size, sequence_length, num_classes])
    
print ("_hiddens   LOOKS LIKE [%s]" % (_hiddens))
print ("_rnnouts   LOOKS LIKE [%s]" % (_rnnouts))
print ("_denseouts LOOKS LIKE [%s]" % (_denseouts))
print ("outputs    LOOKS LIKE [%s]" % (outputs))
print ("MODEL DEFINED.")

_hiddens   LOOKS LIKE [Tensor("CHAR-RNN/fully_connected/Relu:0", shape=(?, 10, 64), dtype=float32)]
_rnnouts   LOOKS LIKE [Tensor("CHAR-RNN/rnn/transpose:0", shape=(?, 10, 64), dtype=float32)]
_denseouts LOOKS LIKE [Tensor("CHAR-RNN/fully_connected_1/BiasAdd:0", shape=(?, 10, 25), dtype=float32)]
outputs    LOOKS LIKE [Tensor("CHAR-RNN/Reshape:0", shape=(512, 10, 25), dtype=float32)]
MODEL DEFINED.


## DEFINE TF FUNCTIONS

In [10]:
weights  = tf.ones([batch_size, sequence_length]) # EQUAL WEIGHTS
seq_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights) # THIS IS A CLASSIFICATION LOSS
print ("weights LOOKS LIKE [%s]" % (weights))
print ("outputs LOOKS LIKE [%s]" % (outputs))
print ("Y       LOOKS LIKE [%s]" % (Y))

weights LOOKS LIKE [Tensor("ones:0", shape=(512, 10), dtype=float32)]
outputs LOOKS LIKE [Tensor("CHAR-RNN/Reshape:0", shape=(512, 10, 25), dtype=float32)]
Y       LOOKS LIKE [Tensor("Placeholder_1:0", shape=(?, 10), dtype=int32)]


In [11]:
loss  = tf.reduce_mean(seq_loss)
optm  = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
print ("FUNCTIONS DEFINED.")

FUNCTIONS DEFINED.


## OPTIMIZE

In [12]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
MAXITER = 2000
for i in range(MAXITER):
    randidx = np.random.randint(low=0, high=ndata, size=batch_size)
    batchX = [dataX[iii] for iii in randidx]
    batchY = [dataY[iii] for iii in randidx]
    feeds = {X: batchX, Y: batchY}
    _, loss_val, results = sess.run(
        [optm, loss, outputs], feed_dict=feeds)
    if (i%200) == 0:
        print ("[%5d/%d] loss_val: %.5f " % (i, MAXITER, loss_val))

[    0/2000] loss_val: 3.21191 
[  200/2000] loss_val: 0.17677 
[  400/2000] loss_val: 0.17027 
[  600/2000] loss_val: 0.17274 
[  800/2000] loss_val: 0.17712 
[ 1000/2000] loss_val: 0.16777 
[ 1200/2000] loss_val: 0.16392 
[ 1400/2000] loss_val: 0.17212 
[ 1600/2000] loss_val: 0.17277 
[ 1800/2000] loss_val: 0.17011 


## PRINT CHARS

In [13]:
randidx = np.random.randint(low=0, high=ndata, size=batch_size)
batchX = [dataX[iii] for iii in randidx]
batchY = [dataY[iii] for iii in randidx]
feeds = {X: batchX, Y: batchY}
results = sess.run(outputs, feed_dict=feeds)
for j, result in enumerate(results):
    index = np.argmax(result, axis=1)
    chars = [char_set[t] for t in index]
    if j < 10:
        print (chars)

['t', 's', ' ', 'n', 'c', 'h', 'i', 'e', 'v', 'e']
['r', 'e', ' ', 'i', 's', ' ', 'n', 'o', 't', 'h']
['r', 'i', 'o', ' ', 'a', 'd', 'd', ',', ' ', 'b']
['r', 'i', 's', ' ', 'n', 'o', 't', 'h', 'i', 'n']
['e', 'n', 'g', ' ', 'l', 'o', 'r', 'e', ' ', 't']
[' ', 'n', 'o', 't', ' ', 'w', 'h', 'e', 'n', ' ']
['h', 'e', 'r', 'e', ' ', 'i', 's', ' ', 'n', 'o']
['h', 'o', 'n', ' ', 'i', 's', ' ', 'a', 'c', 'h']
['t', 'o', 'r', 'e', ' ', 't', 'o', ' ', 'a', 'd']
['h', 'e', 'r', 'e', ' ', 'i', 's', ' ', 'n', 'o']


### SAMPLING FUNCTION 

In [14]:
LEN = 1 # <= LENGHT IS 1 !!
# XL = tf.placeholder(tf.int32, [None, LEN])
XL     = tf.placeholder(tf.int32, [None, 1])
XL_OH  = tf.one_hot(XL, num_classes)
with tf.variable_scope('CHAR-RNN', reuse=True):
    cell_L = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True, reuse=True)
    # cell_L = rnn.MultiRNNCell([cell_L] * 2, state_is_tuple=True) # BUG IN TF1.1
    istate = cell_L.zero_state(batch_size=1, dtype=tf.float32)
    # DYNAMIC RNN WITH FULLY CONNECTED LAYER
    _hiddens  = tf.contrib.layers.fully_connected(XL_OH, hidden_size, activation_fn=tf.nn.tanh)
    _outputs_L, states_L = tf.nn.dynamic_rnn(cell_L, _hiddens
                                , initial_state=istate, dtype=tf.float32)
    _outputs_L  = tf.contrib.layers.fully_connected(
        _outputs_L, num_classes, activation_fn=None)
    # RESHAPE FOR SEQUNCE LOSS
    outputs_L = tf.reshape(_outputs_L, [LEN, 1, num_classes])
print ("XL    LOOKS LIKE %s" % (XL))
print ("XL_OH LOOKS LIKE %s" % (XL_OH))

def weighted_pick(weights):
    t = np.cumsum(weights)
    s = np.sum(weights)
    return(int(np.searchsorted(t, np.random.rand(1)*s)))
def softmax(x):
    alpha = 1
    e_x = np.exp(alpha*(x - np.max(x)))
    return e_x / np.sum(e_x) # only difference

XL    LOOKS LIKE Tensor("Placeholder_2:0", shape=(?, 1), dtype=int32)
XL_OH LOOKS LIKE Tensor("one_hot_1:0", shape=(?, 1, 25), dtype=float32)


## SAMPLE

### BURNIN

In [15]:
prime = "Perfection is"
istateval = sess.run(cell_L.zero_state(1, tf.float32))
for c in prime[:-1]:
    index = char_dic[c]
    inval = [[index]]
    outval, stateval = sess.run([outputs_L, states_L]
                        , feed_dict={XL:inval, istate:istateval})
    istateval = stateval # UPDATE STATE MANUALLY!!

### SAMPLE

In [16]:
inval  = [[char_dic[prime[-1]]]]
outval, stateval = sess.run([outputs_L, states_L]
                    , feed_dict={XL:inval, istate:istateval})
istateval = stateval
index = np.argmax(outval)
char  = char_set[index]
chars = char
for i in range(100):
    inval = [[index]]
    outval, stateval = sess.run([outputs_L, states_L]
                        , feed_dict={XL:inval, istate:istateval})
    istateval = stateval
    # index = np.argmax(outval)
    index = weighted_pick(softmax(outval))
    char  = char_set[index]
    chars += char
    print ("[%d] \n -inval: %s \n -outval: %s \n -index: %d (char: %s) \n -chars: %s" 
           % (i, inval, outval, index, char, chars))

[0] 
 -inval: [[0]] 
 -outval: [[[  0.43459255  -1.86375201  -1.69432783  -3.01995969  13.41134644
    -1.21198893   2.72766447  -1.45890594  -4.63119221  -5.06109238
   -10.12859058   2.08865809  -0.96358591  -6.68012571   2.0658648
     2.05539227   0.59480566   5.51889372   0.29940543  -4.80094528
    -4.37876034   0.55659646   3.2993567   -5.50388479  -1.97359097]]] 
 -index: 4 (char: a) 
 -chars:  a
[1] 
 -inval: [[4]] 
 -outval: [[[  0.68312281   0.40481463   0.8408187   -1.35446525   2.06648922
    13.89925385   1.09586132  -2.72781563   4.26260567  -1.80914021
    -4.58906937 -10.44151497  -0.40274745  -1.37787974  -1.0277065
    -1.06843114   3.47194767   1.77147841   1.76074755  -4.09818029
     4.39141321  -3.42963386   2.86370873  -1.97489548   4.0939908 ]]] 
 -index: 5 (char: c) 
 -chars:  ac
[2] 
 -inval: [[5]] 
 -outval: [[[  4.31779194  -4.86974907  -0.25703609  -3.0412395    1.68185878
     0.76347589  -2.93922043  -2.4224112   -3.36837697  -0.11704157
    -0.16082594 

[28] 
 -inval: [[4]] 
 -outval: [[[  1.05707979  -3.49837804  -4.05083942  -3.39306855   0.5504005
     0.04809251  -2.24258637   2.57822895   3.73503399   0.78401244
     0.76485348  -6.34836483   1.68784964  14.77108574  -1.14652085
     0.37121561   0.78678799  -3.77104664  -1.91989362  -0.42916781
    -1.54047441  -3.56293821   5.20786953  -3.24290371   2.47079277]]] 
 -index: 13 (char: k) 
 -chars:  achieved, nothing left to tak
[29] 
 -inval: [[13]] 
 -outval: [[[  3.4057827   -3.57912683   1.90999317  -2.62973666   2.80007887
    -7.73233318  -1.89283097  12.82486629  -2.79956055   1.45449495
     2.82838607   0.83147383   0.607198     2.08024526  -4.84275389
    -3.60187197   1.50693774  -3.81210756  -2.60412621  -4.67201328
    -9.28865433  -6.01392651  -4.29554892  -3.51218414   1.61193562]]] 
 -index: 7 (char: e) 
 -chars:  achieved, nothing left to take
[30] 
 -inval: [[7]] 
 -outval: [[[ 13.25839424  -7.14551973  -3.89604664  -4.94898272   3.351089
    -2.70607519  -3.1014

In [17]:
print ("<SAMPLED SETENCE> \n %s" % (prime+chars))
print ("\n<ORIGINAL SENTENCE> \n %s" % (sentence))

<SAMPLED SETENCE> 
 Perfection is achieved, nothing left to take awaye away.. way.but when there is nothing left to take awaye away.ay

<ORIGINAL SENTENCE> 
 Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away.
