In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
import numpy as np
import matplotlib.pyplot as plt

In [4]:
!python3 prepare_data.py shakespeare shake -l 200

2020-11-29 18:42:35.198650: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
Split input into 22981 sequences...
Serialized 100 sequences...
Serialized 200 sequences...
Serialized 300 sequences...
Serialized 400 sequences...
Serialized 500 sequences...
Serialized 600 sequences...
Serialized 700 sequences...
Serialized 800 sequences...
Serialized 900 sequences...
Serialized 1000 sequences...
Serialized 1100 sequences...
Serialized 1200 sequences...
Serialized 1300 sequences...
Serialized 1400 sequences...
Serialized 1500 sequences...
Serialized 1600 sequences...
Serialized 1700 sequences...
Serialized 1800 sequences...
Serialized 1900 sequences...
Serialized 2000 sequences...
Serialized 2100 sequences...
Serialized 2200 sequences...
Serialized 2300 sequences...
Serialized 2400 sequences...
Serialized 2500 sequences...
Serialized 2600 sequences...
Serialized 2700 sequences...
Serialized 2800 sequences...
Serialized 2900

In [5]:
from prepare_data import parse_seq
import pickle

# this is just a datasets of "bytes" (not understandable)
data = tf.data.TFRecordDataset("shake.tfrecords")

# this maps a parser function that properly interprets the bytes over the dataset
# (with fixed sequence length 200)
# if you change the sequence length in preprocessing you also need to change it here
data = data.map(lambda x: parse_seq(x, 200))

# a map from characters to indices
vocab = pickle.load(open("shake_vocab", mode="rb"))
vocab_size = len(vocab)
# inverse mapping: indices to characters
ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}

#print(np.array(data).shape)

print(vocab)
print(vocab_size)

{'!': 1, 'y': 2, '?': 3, 'C': 4, 'h': 5, 'P': 6, 'L': 7, 'J': 8, 'G': 9, 'I': 10, ';': 11, 'b': 12, 'c': 13, 'H': 14, 'k': 15, 'E': 16, 't': 17, 'i': 18, ',': 19, 's': 20, 'U': 21, 'F': 22, 'Q': 23, '3': 24, ']': 25, 'u': 26, 'Z': 27, 'j': 28, 'K': 29, 'n': 30, 'r': 31, 'A': 32, 'f': 33, ' ': 34, '$': 35, 'M': 36, 'x': 37, 'R': 38, 'D': 39, 'm': 40, 'T': 41, 'e': 42, "'": 43, 'V': 44, 'w': 45, 'g': 46, 'a': 47, 'd': 48, 'O': 49, 'q': 50, '[': 51, '-': 52, 'z': 53, ':': 54, 'v': 55, '\n': 56, 'p': 57, '.': 58, '&': 59, 'Y': 60, 'X': 61, 'B': 62, 'W': 63, 'S': 64, 'N': 65, 'l': 66, 'o': 67, '<S>': 0}
68


In [6]:
def run_rnn(input_char,next_char,h_prev,parameters):
  W_xh,b_h,W_hh,b_o,W_ho = parameters
  h_prev = tf.matmul(h_prev,W_hh)
  activation = tf.matmul(input_char,W_xh) + b_h + h_prev
  h_prev = tf.nn.tanh(activation)
  output = (tf.matmul(h_prev,W_ho) + b_o)
  xent = loss_func(next_char,output)
  return xent, h_prev

#@tf.function
def train_step(parameters,h_prev):
  step = 0
  for ele in train_data:
    batch_oh_seq = tf.one_hot(ele,depth=vocab_size)
    loss_list = tf.TensorArray(tf.float32,size=199,clear_after_read=False)
    loss_list1 = list()
    with tf.GradientTape() as tape:
      for time_step in tf.range(0,ele.shape[1]-1):
        input_char = batch_oh_seq[:,time_step]
        output_char = batch_oh_seq[:,time_step + 1]
        loss, h_prev = run_rnn(input_char,output_char,h_prev,parameters)
        loss_list1.append(loss)
        loss_list = loss_list.write(time_step,loss)
      batchloss = tf.reduce_mean(loss_list.stack())
      grads = tape.gradient(batchloss,parameters)
      optimizer.apply_gradients(zip(grads,parameters))
    step+=1
    if not step % 10:
      print("loss: ",batchloss)

In [7]:
train_data = data.shuffle(30000).batch(128,drop_remainder=True).repeat(30)
print(len(list(train_data)))
hidden_units = 200
num_class = 68
W_xh = tf.Variable(tf.random.normal(shape=[vocab_size,hidden_units], stddev=0.01,mean=0,dtype=tf.float32)) # 68 * 200
b_h = tf.Variable(tf.zeros(shape=hidden_units,dtype=tf.float32)) # 200 
W_hh = tf.Variable(tf.random.normal(shape=[hidden_units,hidden_units], stddev=0.01,mean=0,dtype=tf.float32)) # 200 * 200
h_prev = tf.Variable(tf.zeros(shape=[128,hidden_units], dtype=tf.float32)) # h_0
b_o = tf.Variable(tf.zeros(shape=num_class, dtype=tf.float32)) # 68
W_ho = tf.Variable(tf.random.normal(shape=[hidden_units,vocab_size], stddev=0.01,mean=0,dtype=tf.float32)) # 200 * 68
loss_func = tf.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(0.001)
parameters = [W_xh,b_h,W_hh,b_o,W_ho]
train_step(parameters,h_prev)

5370
loss:  tf.Tensor(3.9245973, shape=(), dtype=float32)
loss:  tf.Tensor(3.3471973, shape=(), dtype=float32)
loss:  tf.Tensor(3.3417006, shape=(), dtype=float32)
loss:  tf.Tensor(3.2934482, shape=(), dtype=float32)
loss:  tf.Tensor(3.2821164, shape=(), dtype=float32)
loss:  tf.Tensor(3.3063672, shape=(), dtype=float32)
loss:  tf.Tensor(3.3152566, shape=(), dtype=float32)
loss:  tf.Tensor(3.3086376, shape=(), dtype=float32)
loss:  tf.Tensor(3.3147714, shape=(), dtype=float32)
loss:  tf.Tensor(3.3127499, shape=(), dtype=float32)
loss:  tf.Tensor(3.313601, shape=(), dtype=float32)
loss:  tf.Tensor(3.3244069, shape=(), dtype=float32)
loss:  tf.Tensor(3.3122659, shape=(), dtype=float32)
loss:  tf.Tensor(3.2796457, shape=(), dtype=float32)
loss:  tf.Tensor(3.3138254, shape=(), dtype=float32)
loss:  tf.Tensor(3.3158913, shape=(), dtype=float32)
loss:  tf.Tensor(3.3084078, shape=(), dtype=float32)
loss:  tf.Tensor(3.2782125, shape=(), dtype=float32)
loss:  tf.Tensor(3.247336, shape=(), dtype

In [49]:
#ch_to_ind = {v:k for k, v in ind_to_ch.items()}
print(ind_to_ch)
np.set_printoptions(precision=6)
list_ind = range(1,69)
def gen_chars():
  input_char = tf.Variable(tf.zeros(shape=[1,68],dtype=tf.float32))
  h_prev = tf.Variable(tf.zeros(shape=[1,hidden_units], dtype=tf.float32))
  count = 0
  while count < 50:
    activation = tf.matmul(input_char,W_xh) + b_h + h_prev
    h_prev = tf.nn.tanh(activation)
    output = (tf.matmul(h_prev,W_ho) + b_o)
    y_pred = tf.nn.softmax(output)
    print("y_pred: ",y_pred)

    pd = tf.reshape(y_pred,[-1])
    print(pd.shape)
    print("pd1: ",pd)
    print("sum pd: ",sum(pd))
    #pd /= sum(pd)
    #print("pd2: ",sum(pd))
    #np.set_printoptions(precision=6)
    ch = np.random.choice(a=list_ind,p=pd)
    #np.random.choice()
    print(ch)
    count+=1

gen_chars()




{1: '!', 2: 'y', 3: '?', 4: 'C', 5: 'h', 6: 'P', 7: 'L', 8: 'J', 9: 'G', 10: 'I', 11: ';', 12: 'b', 13: 'c', 14: 'H', 15: 'k', 16: 'E', 17: 't', 18: 'i', 19: ',', 20: 's', 21: 'U', 22: 'F', 23: 'Q', 24: '3', 25: ']', 26: 'u', 27: 'Z', 28: 'j', 29: 'K', 30: 'n', 31: 'r', 32: 'A', 33: 'f', 34: ' ', 35: '$', 36: 'M', 37: 'x', 38: 'R', 39: 'D', 40: 'm', 41: 'T', 42: 'e', 43: "'", 44: 'V', 45: 'w', 46: 'g', 47: 'a', 48: 'd', 49: 'O', 50: 'q', 51: '[', 52: '-', 53: 'z', 54: ':', 55: 'v', 56: '\n', 57: 'p', 58: '.', 59: '&', 60: 'Y', 61: 'X', 62: 'B', 63: 'W', 64: 'S', 65: 'N', 66: 'l', 67: 'o', 0: '<S>'}
y_pred:  tf.Tensor(
[[0.004733 0.01068  0.019166 0.011057 0.010282 0.019857 0.006696 0.00674
  0.005712 0.008354 0.014951 0.012052 0.020259 0.0196   0.010649 0.008649
  0.01205  0.038484 0.032543 0.019199 0.030702 0.005956 0.007851 0.004189
  0.004298 0.005299 0.018932 0.005085 0.007637 0.004753 0.038515 0.027678
  0.013384 0.014888 0.045079 0.005018 0.00703  0.005719 0.005758 0.005071
  0.0

ValueError: ignored