[View in Colaboratory](https://colab.research.google.com/github/sl2820/Deep_Learning_Study/blob/master/Character_RNN.ipynb)

# SIMPLE CHAR-RNN

In [2]:
from __future__ import print_function
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
tf.set_random_seed(0)
print("Tensorflow Version is %s" % (tf.__version__))

Tensorflow Version is 1.11.0


### Define Training Sequence

In [5]:
quote1 = ("If you want to build a ship, "
          "Don't drum up people to collect wood and don't assign them tasks and work,"
          " but rather teach them to long for the endless immensity of the sea."
         )
quote2 = ("Perfection is achieved, "
           "not when there is nothing more to add, "
           "but when there is nothing left to take away."
)
sentence = quote2
print("FOLLOWING IS OUR TRAINING SEQUENCE:")
print(sentence)


FOLLOWING IS OUR TRAINING SEQUENCE:
Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away.


### Define Vocabulary and Dictionary

In [7]:
char_set = list(set(sentence))
char_dic = {w: i for i, w in enumerate(char_set)}
print("Vocabulary: ")
print(char_set)
print("Dictionary: ")
print(char_dic)

Vocabulary: 
['r', 'k', 'd', 'g', 'o', 'b', 'a', 'c', '.', 'l', 'm', 'y', 'u', 's', ' ', 'e', 'i', 't', ',', 'v', 'P', 'h', 'n', 'w', 'f']
Dictionary: 
{'r': 0, 'k': 1, 'd': 2, 'g': 3, 'o': 4, 'b': 5, 'a': 6, 'c': 7, '.': 8, 'l': 9, 'm': 10, 'y': 11, 'u': 12, 's': 13, ' ': 14, 'e': 15, 'i': 16, 't': 17, ',': 18, 'v': 19, 'P': 20, 'h': 21, 'n': 22, 'w': 23, 'f': 24}


Vocab: Number ==> Char / Dictionary: Char ==> Number

(Change our chars into numberings so that it can be entered into RNN)

### Configure Network

In [9]:
data_dim = len(char_set)
num_classes = len(char_set)
hidden_size = 64
sequence_length = 10 # Can be any arbitrary number (gonna use first 10 sequences by cutting them according to this defined number)
print("Data_Dim is [%d]" %(data_dim))

Data_Dim is [25]


### Set Training Batches

In [0]:
def print_np(_name,_x):
  print("Type of [%s] is [%s]" % (_name, type(_x)))
  print("Shape of [%s] is %s" % (_name, _x.shape,))

def print_list(_name,_x):
  print("Type of [%s] is [%s]" % (_name, type(_x)))
  print("Length of [%s] is %s" % (_name, len(_x)))
  print("%s[0] Looks like %s" % (_name,_x[0]))


In [12]:
# Character's position is pushed backward by 1 
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length):
  x_str = sentence[i:i+sequence_length]
  y_str = sentence[i+1:i+sequence_length + 1] #pusing char position by 1 for output
  x = [char_dic[c] for c in x_str]
  y = [char_dic[c] for c in y_str]
  dataX.append(x)
  dataY.append(y)
  if i < 5:
    print("[%4d/%4d] [%s]=>[%s]" % (i, len(sentence), x_str, y_str))
    print("%s%s => %s " %(' '*12, x,y))
print_list('dataX',dataX)
print_list('dataY',dataY)

[   0/ 107] [Perfection]=>[erfection ]
            [20, 15, 0, 24, 15, 7, 17, 16, 4, 22] => [15, 0, 24, 15, 7, 17, 16, 4, 22, 14] 
[   1/ 107] [erfection ]=>[rfection i]
            [15, 0, 24, 15, 7, 17, 16, 4, 22, 14] => [0, 24, 15, 7, 17, 16, 4, 22, 14, 16] 
[   2/ 107] [rfection i]=>[fection is]
            [0, 24, 15, 7, 17, 16, 4, 22, 14, 16] => [24, 15, 7, 17, 16, 4, 22, 14, 16, 13] 
[   3/ 107] [fection is]=>[ection is ]
            [24, 15, 7, 17, 16, 4, 22, 14, 16, 13] => [15, 7, 17, 16, 4, 22, 14, 16, 13, 14] 
[   4/ 107] [ection is ]=>[ction is a]
            [15, 7, 17, 16, 4, 22, 14, 16, 13, 14] => [7, 17, 16, 4, 22, 14, 16, 13, 14, 6] 
Type of [dataX] is [<class 'list'>]
Length of [dataX] is 97
dataX[0] Looks like [20, 15, 0, 24, 15, 7, 17, 16, 4, 22]
Type of [dataY] is [<class 'list'>]
Length of [dataY] is 97
dataY[0] Looks like [15, 0, 24, 15, 7, 17, 16, 4, 22, 14]


In [13]:
ndata = len(dataX)
batch_size = 512
print("      'ndata' is %d" %(ndata))
print("'Batch_size' is %d" %(batch_size))

      'ndata' is 97
'Batch_size' is 512


### Define Placeholders

In [14]:
X = tf.placeholder(tf.int32, [None, sequence_length])
X_OH = tf.one_hot(X, num_classes)
Y = tf.placeholder(tf.int32, [None, sequence_length])
print("'Sequence_length' is [%d]" % (sequence_length))
print("'num_classes' is [%d]"% (num_classes))
print("'X' Looks like \n   [%s]"%(X))
print("'X_OH' Looks like \n   [%s]"%(X_OH))
print("'Y' Looks like \n   [%s]" % (Y))

'Sequence_length' is [10]
'num_classes' is [25]
'X' Looks like 
   [Tensor("Placeholder:0", shape=(?, 10), dtype=int32)]
'X_OH' Looks like 
   [Tensor("one_hot:0", shape=(?, 10, 25), dtype=float32)]
'Y' Looks like 
   [Tensor("Placeholder_1:0", shape=(?, 10), dtype=int32)]


In [15]:
with tf.variable_scope('CHAR-RNN', reuse=False):
  cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True, reuse = False)
  _hiddens = tf.contrib.layers.fully_connected(X_OH, hidden_size, activation_fn = tf.nn.relu)
  _rnnouts, _states = tf.nn.dynamic_rnn(cell, _hiddens, dtype = tf.float32)
  _denseouts = tf.contrib.layers.fully_connected(_rnnouts, num_classes, activation_fn=None)
  outputs = tf.reshape(_denseouts, [batch_size, sequence_length, num_classes])
  
print("_hiddens LOOKS LIKE [%s]" % (_hiddens))
print("_rnnouts LOOKS LIKE [%s]" % (_rnnouts))
print("_denseouts LOOKS LIKE [%s]" % (_denseouts))
print("outputs LOOKS LIKE [%s]" % (outputs))
print("Model Defined")

Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').
_hiddens LOOKS LIKE [Tensor("CHAR-RNN/fully_connected/Relu:0", shape=(?, 10, 64), dtype=float32)]
_rnnouts LOOKS LIKE [Tensor("CHAR-RNN/rnn/transpose_1:0", shape=(?, 10, 64), dtype=float32)]
_denseouts LOOKS LIKE [Tensor("CHAR-RNN/fully_connected_1/BiasAdd:0", shape=(?, 10, 25), dtype=float32)]
outputs LOOKS LIKE [Tensor("CHAR-RNN/Reshape:0", shape=(512, 10, 25), dtype=float32)]
Model Defined


### Define TF Functions

In [17]:
weights = tf.ones([batch_size, sequence_length])
seq_loss = tf.contrib.seq2seq.sequence_loss(logits=outputs, targets = Y, weights = weights)
print("Weights LOOKS LIKE [%s]" %(weights))
print("outputs LOOKS LIKE [%s]" %(outputs))
print("Y LOOKS LIKE [%s]" %(Y))

Weights LOOKS LIKE [Tensor("ones_1:0", shape=(512, 10), dtype=float32)]
outputs LOOKS LIKE [Tensor("CHAR-RNN/Reshape:0", shape=(512, 10, 25), dtype=float32)]
Y LOOKS LIKE [Tensor("Placeholder_1:0", shape=(?, 10), dtype=int32)]


In [18]:
loss = tf.reduce_mean(seq_loss)
optm = tf.train.AdamOptimizer(learning_rate = 0.01).minimize(loss)
print("Functions Defined")

Functions Defined


### Now, Train and Optimize

In [20]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config= config)
sess.run(tf.global_variables_initializer())
MAXITER = 2000
for i in range(MAXITER):
  randidx = np.random.randint(low=0, high = ndata, size = batch_size)
  batchX = [dataX[iii] for iii in randidx]
  batchY = [dataY[iii] for iii in randidx]
  feeds = {X: batchX, Y: batchY}
  _, loss_val, results = sess.run([optm,loss,outputs], feed_dict=feeds)
  if (i%200)==0:
    print ("[%5d/%d] loss_val: %.5f" %(i, MAXITER, loss_val))

[    0/2000] loss_val: 3.22349
[  200/2000] loss_val: 0.16818
[  400/2000] loss_val: 0.16984
[  600/2000] loss_val: 0.17095
[  800/2000] loss_val: 0.16767
[ 1000/2000] loss_val: 0.16796
[ 1200/2000] loss_val: 0.16884
[ 1400/2000] loss_val: 0.17216
[ 1600/2000] loss_val: 0.17409
[ 1800/2000] loss_val: 0.17126


### Batch Looks Like

In [21]:
print("LENGTH OF BATCHX IS %d" % (len(batchX)))
print("batchX[0] looks like %s" %(batchX[0]))
print("LENGTH OF BATCHY IS %d" % (len(batchY)))
print("batchY[0] looks like %s" %(batchY[0]))

LENGTH OF BATCHX IS 512
batchX[0] looks like [15, 22, 14, 17, 21, 15, 0, 15, 14, 16]
LENGTH OF BATCHY IS 512
batchY[0] looks like [22, 14, 17, 21, 15, 0, 15, 14, 16, 13]


### Print Chars

In [22]:
randidx = np.random.randint(low=0, high = ndata, size = batch_size)
batchX = [dataX[iii] for iii in randidx]
batchY = [dataY[iii] for iii in randidx]
feeds = {X: batchX}
results = sess.run(outputs, feed_dict = feeds)
for j, result in enumerate (results):
  index = np.argmax(result, axis = 1)
  chars = [char_set[t] for t in index]
  if j <10:
    print ("OUT OF BATCH X:     %s => %s" % (index, chars))
    print("BATCH Y (TARGET):    %s\n" % (batchY[j]))

OUT OF BATCH X:     [17 21 16 22  3 14  9 15 24 17] => ['t', 'h', 'i', 'n', 'g', ' ', 'l', 'e', 'f', 't']
BATCH Y (TARGET):    [17, 21, 16, 22, 3, 14, 9, 15, 24, 17]

OUT OF BATCH X:     [14 14 17 21 15  0 15 14 16 13] => [' ', ' ', 't', 'h', 'e', 'r', 'e', ' ', 'i', 's']
BATCH Y (TARGET):    [22, 14, 17, 21, 15, 0, 15, 14, 16, 13]

OUT OF BATCH X:     [21 23  4 14 17  6  1 15 14  6] => ['h', 'w', 'o', ' ', 't', 'a', 'k', 'e', ' ', 'a']
BATCH Y (TARGET):    [14, 17, 4, 14, 17, 6, 1, 15, 14, 6]

OUT OF BATCH X:     [ 4 17 21 16 22  3 14  9 15 24] => ['o', 't', 'h', 'i', 'n', 'g', ' ', 'l', 'e', 'f']
BATCH Y (TARGET):    [4, 17, 21, 16, 22, 3, 14, 9, 15, 24]

OUT OF BATCH X:     [15  2 18 14 22  4 17 14 23 21] => ['e', 'd', ',', ' ', 'n', 'o', 't', ' ', 'w', 'h']
BATCH Y (TARGET):    [15, 2, 18, 14, 22, 4, 17, 14, 23, 21]

OUT OF BATCH X:     [14 17 14 17  4 14 17  6  1 15] => [' ', 't', ' ', 't', 'o', ' ', 't', 'a', 'k', 'e']
BATCH Y (TARGET):    [24, 17, 14, 17, 4, 14, 17, 6, 1, 15]

O

### Sampling with real sentences

In [26]:
LEN = 1 # <= LENGTH IS 1 (single Alphabets)
XL = tf.placeholder(tf.int32, [None, 1])
XL_OH = tf.one_hot(XL, num_classes)
with tf.variable_scope('CHAR-RNN', reuse=True):
    cell_L = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True, reuse=True)
    istate = cell_L.zero_state(batch_size=1, dtype=tf.float32)
    # DYNAMIC RNN WITH FULLY CONNECTED LAYER
    _hiddens = tf.contrib.layers.fully_connected(XL_OH, hidden_size, activation_fn=tf.nn.relu)
    _outputs_L, states_L = tf.nn.dynamic_rnn(cell_L, _hiddens, initial_state=istate, dtype=tf.float32)
    _outputs_L = tf.contrib.layers.fully_connected(_outputs_L, num_classes, activation_fn=None)
    # RESHAPE FOR SEQUNCE LOSS
    outputs_L = tf.reshape(_outputs_L, [LEN, 1, num_classes])
print ("XL LOOKS LIKE %s" % (XL))
print ("XL_OR LOOKS LIKE %s" % (XL_OH))

XL LOOKS LIKE Tensor("Placeholder_3:0", shape=(?, 1), dtype=int32)
XL_OR LOOKS LIKE Tensor("one_hot_2:0", shape=(?, 1, 25), dtype=float32)


Helper Function

In [0]:
def weighted_pick(weights):
  t = np.cumsum(weights)
  s = np.sum(weights)
  return(int(np.searchsorted(t, np.random.rand(1)*s)))
def softmax(x):
  alpha = 1
  e_x = np.exp(alpha*(x-np.max(x)))
  return e_x / np.sum(e_x)

## Sample

### Burnin 
어느정도까지 state Value를 만들어 놓음

In [28]:
prime = 'Perfection is'
istateval = sess.run(cell_L.zero_state(1, tf.float32))
for i, c in enumerate(prime[:-1]):
  index = char_dic[c]
  inval = [[index]]
  outval, stateval, = sess.run([outputs_L, states_L], feed_dict = {XL: inval, istate: istateval})
  istateval = stateval #Need to update it manually (not resetting to 0)
  if i<3:
    print("[%d] -char:   %s\n        -inval: %s\n          -outval: %s " %(i,c,inval,outval))

[0] -char:   P
        -inval: [[20]]
          -outval: [[[ 2.0184999  -3.0783024  -2.4073434  -1.3308257   0.14816126
   -5.4975224  -5.8551426  -3.692172   -4.1441636  -1.3028071
   -0.9030321  -3.7424674  -2.9275444  -2.5180972   2.3759995
   11.894718    3.1014528  -1.2285695  -2.2755907  -3.9800477
   -4.7920647   0.29798278 -1.374332   -6.5023813   0.25475442]]] 
[1] -char:   e
        -inval: [[15]]
          -outval: [[[ 1.3329857e+01 -1.9366572e+00  1.2236079e+00  1.4074770e+00
   -2.5847020e+00 -5.9753519e-01 -9.0684776e+00  3.0838647e+00
   -4.4462156e+00  1.8340709e+00  9.3111187e-01 -3.2434022e+00
   -1.9366205e+00 -7.7895975e+00  4.1669011e+00  1.9124703e+00
   -4.0079122e+00  1.1646785e-02 -1.1735548e+00  3.5084155e+00
   -3.9057841e+00 -4.1389947e+00  4.2248154e+00 -4.4388084e+00
    6.5171471e+00]]] 
[2] -char:   r
        -inval: [[0]]
          -outval: [[[  3.8341448   -4.383765     1.3476018   -7.659535    -0.83428717
    -5.1933603  -10.180303     0.5801746   -1.

### Sample


In [36]:
inval = [[char_dic[prime[-1]]]]
outval, stateval, = sess.run([outputs_L, states_L], feed_dict = {XL:inval, istate:istateval})
istateval = stateval
index = np.argmax(outval)
char = char_set[index]
chars = char
for i in range(100):
  inval = [[index]]
  outval, stateval, = sess.run([outputs_L, states_L], feed_dict = {XL:inval, istate:istateval})
  istateval = stateval
  index = weighted_pick (softmax(outval))
  char = char_set[index]
  chars += char
  if i<5:
    print ("[%d] \n -inval: %s \n -outval: %s \n -index: %d (char: %s) \n -chars: %s" %(i, inval, outval, index, char, chars))

[0] 
 -inval: [[14]] 
 -outval: [[[-0.5296618  -5.2545314  -6.4651403  -3.6092443   3.036845
    5.417918    7.7686768  -3.44881    -4.432367   -1.401354
   -2.0292108  -1.8153312  -3.3270125  -4.0432158   4.077156
   -2.8549223  -0.3917279   6.3056517  -4.231882   -3.5820951
   -5.0756216   0.8595918   7.0067086  -0.45310456 -3.900176  ]]] 
 -index: 14 (char:  ) 
 -chars:   
[1] 
 -inval: [[14]] 
 -outval: [[[-3.1658716  -5.427582   -1.9004966  -6.2994604   1.7897875
    1.0864255   3.7496128  -0.01888254 -5.004441   -1.0342398
   -1.3573551  -2.446954    2.0126808  -0.52119756 -0.77971613
   -0.96473324  3.1472507  11.79686    -5.0440283  -6.5534654
   -5.6063466  -1.2281669   2.3464704   5.613884   -4.0813804 ]]] 
 -index: 17 (char: t) 
 -chars:   t
[2] 
 -inval: [[17]] 
 -outval: [[[-3.2141252   0.46139887 -1.8482741  -4.9012055   8.154205
   -3.0452607   7.814602   -3.1504538  -2.8097513  -9.072678
   -8.246405   -3.193805   -4.065776   -1.1764843   5.613786
    0.10685425 -1.8988

In [37]:
print("<Sampled Sentence> \n %s" %(prime+chars))
print("\n<Original Sentence> \n %s" %(sentence))

<Sampled Sentence> 
 Perfection is  take away.. when there is nothing left to take away. away. when there is nothing more to add, but w

<Original Sentence> 
 Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away.
