<a href="https://colab.research.google.com/github/krazygaurav/IDL-labs/blob/master/Ex5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## GROUP MEMBERS :

- Aniruddh Shukla (231714)
- Gaurav Singhal (226023)
- Himanshi Bajaj (225827)

In [0]:
import tensorflow as tf
import numpy as np
import os

In [0]:
os.chdir("/content/drive/My Drive/IDL-Ex-Colab/resources/ass5")

In [0]:
!python prepare_data.py shakespeare_input.txt skp

In [6]:
from prepare_data import parse_seq
import pickle

# this is just a datasets of "bytes" (not understandable)
data = tf.data.TFRecordDataset("skp.tfrecords")

# this maps a parser function that properly interprets the bytes over the dataset
# (with fixed sequence length 200)
# if you change the sequence length in preprocessing you also need to change it here
data = data.map(lambda x: parse_seq(x, 200))

# a map from characters to indices
vocab = pickle.load(open("skp_vocab", mode="rb"))
vocab_size = len(vocab)
# inverse mapping: indices to characters
ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}

print(vocab)
print(vocab_size)

{'3': 1, '[': 2, 'C': 3, 'x': 4, ']': 5, 'o': 6, 'G': 7, '-': 8, ':': 9, '?': 10, ' ': 11, 'h': 12, '\n': 13, 'q': 14, 'P': 15, 'T': 16, 'Q': 17, 'V': 18, 'M': 19, 'Z': 20, 'g': 21, 'u': 22, 'I': 23, 'i': 24, 'y': 25, 'z': 26, 'A': 27, '!': 28, 'd': 29, 'B': 30, 't': 31, 'K': 32, 'l': 33, '&': 34, 'F': 35, 'm': 36, 'N': 37, 'H': 38, 'E': 39, 'O': 40, 'b': 41, 'J': 42, 'p': 43, 'X': 44, '.': 45, 'S': 46, 'v': 47, 'D': 48, 'a': 49, 'Y': 50, 'r': 51, 'R': 52, "'": 53, 'f': 54, '$': 55, 'c': 56, 'L': 57, 'U': 58, 'j': 59, ';': 60, 'n': 61, 'w': 62, 'W': 63, 's': 64, 'e': 65, ',': 66, 'k': 67, '<S>': 0}
68


In [0]:
def initialization(hidden_units):
  '''
  Initializing the learnable parameters

  @input
  hidden_units -> Number of hidden units

  @output
  Dictionary of all parameters 
  '''

  # Assuming hidden_units = 64 and Vocab_size = 28
  # 64*68 -> Weight b/w input and hidden state 
  W_xh = tf.Variable(np.random.uniform(size=(hidden_units, vocab_size), low=-.01, high=.01).astype(np.float32), trainable=True)
  # 64*64 -> Weight b/w hidden and hidden state
  W_hh = tf.Variable(np.random.uniform(size=(hidden_units, hidden_units), low=-.01, high=.01).astype(np.float32), trainable=True)
  # 68*64 -> Weight b/w hidden and output state
  W_ho = tf.Variable(np.random.uniform(size=(vocab_size, hidden_units), low=-.01, high=.01).astype(np.float32), trainable=True)
  
  # 64*1 -> Bias for hidden state
  b_h = tf.Variable(np.zeros((hidden_units, 1)), dtype=np.float32, trainable=True)
  # 68*1 -> Bias for output state
  b_o = tf.Variable(np.zeros((vocab_size, 1)), dtype=np.float32, trainable=True)

  params = {}
  params['W_xh'] = W_xh
  params['W_hh'] = W_hh
  params['W_ho'] = W_ho
  params['b_h'] = b_h
  params['b_o'] = b_o
  
  return params


In [0]:
@tf.function
def rnn_cell(h_prev, x_t, params):
  '''
  Performs operation on 1 RNN cell. 
  
  @input
  h_prev: Hidden state of previous RNN cell. Initially it will be 0
  x_t: Expected dimension of x_t is -> 68*128
  params: Learning parameters

  @output
  h_next: Next hidden state. Dimension: 64*128
  out_t: Output state for current RNN Cell. Dimension: 68*128
  '''
  W_xh = params['W_xh']
  W_hh = params['W_hh']
  W_ho = params['W_ho']
  b_h = params['b_h']
  b_o = params['b_o']

  # Use tanh and sigmoid respectively.
  h_next = tf.tanh(tf.matmul(W_xh, x_t) + tf.matmul(W_hh, h_prev) + b_h)
  out_t = tf.matmul(W_ho, h_next) + b_o
  
  # print("{} x {}".format(h_next.shape, out_t.shape))

  return h_next, out_t 

In [0]:
@tf.function
def rnn_forward(h_0, x_batch, params):
  '''
  Performs one RNN Forward pass. Loops over the given sequence and uses rnn_cell get get output and hidden cell

  @input
  h_0: Initial hidden unit values. 0
  x_batch: One batch for a forward pass. Dimension: 128*200*68
  params: Learning parameters

  @outpu
  Collection of hidden states
  Output for complete Sequence
  '''

  # x_t-> (128, 200, 68)
  m, n_T, n_x = x_batch.shape
  # Transpose it for easier access 
  x_batch = tf.transpose(x_batch, perm=[1, 2, 0])

  # n_y -> Vocab size, n_a -> hidden_units
  n_y, n_a = params['W_ho'].shape
  
  # Variables to store the Next hidden states and Output
  # n_T, n_a, m
  h = []
  # n_T, n_y, m
  y_pred = []

  # Next hidden state
  h_next = h_0

  for i in range(n_T):
    x_t = x_batch[i, :, :]
    # Sending 68*128 inputs at a time
    h_next, out_t  = rnn_cell(h_next, x_t, params)
    h.append(h_next)
    y_pred.append(out_t)

  return tf.stack(h), tf.stack(y_pred)

In [0]:
def model_execution(epochs, params, optimizer):
  '''
  Executes the complete operation.

  @inputs
  epochs: Number of epochs to run
  params: Learning parameters
  optimizer: Optimizer to use
  '''

  for epoch in range(epochs):
    losses = []
    BATCH_SIZE = 128
    train_data = data.batch(batch_size=BATCH_SIZE).shuffle(10000)
    for x in train_data:
      # initial hidden state
      h_o =  tf.zeros((params['W_hh'].shape[0], x.shape[0]))
      x_batch = tf.one_hot(x, vocab_size)

      # Creating output. One step shifted from input seq
      # Space OHE to add to the output
      last_char = np.zeros(shape=(x_batch.shape[0], 1, vocab_size))
      last_char[:, 0, 63] = 1.0
      output = tf.concat([x_batch[:, 1:, :], last_char], axis=1)
      
      with tf.GradientTape() as tape:
        h_t, logits = rnn_forward(h_o, x_batch, params)
        logits = tf.transpose(logits, perm=[2, 0, 1])
        logits = tf.reshape(logits, shape=[-1, logits.shape[2]])
        output = tf.reshape(output, shape=[-1, output.shape[2]])
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=output, logits=logits))
        losses.append(loss)

      grads = tape.gradient(loss, list(params.values()))
      optimizer.apply_gradients(zip(grads, list(params.values())))
    print("Epoch: {}, Loss: {}".format(epoch, np.array(losses).mean()))

In [96]:
# Hyper-parameters
hidden_units = 512
epochs = 40
learning_rate = 0.001

# Getting initialization parameters
params = initialization(hidden_units)
# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model_execution(epochs, params, optimizer)

Epoch: 0, Loss: 3.3278353214263916
Epoch: 1, Loss: 2.8117499351501465
Epoch: 2, Loss: 2.4492859840393066
Epoch: 3, Loss: 2.2824795246124268
Epoch: 4, Loss: 2.1607446670532227
Epoch: 5, Loss: 2.066589593887329
Epoch: 6, Loss: 1.991520881652832
Epoch: 7, Loss: 1.9264119863510132
Epoch: 8, Loss: 1.870708703994751
Epoch: 9, Loss: 1.8201895952224731
Epoch: 10, Loss: 1.775444507598877
Epoch: 11, Loss: 1.7357341051101685
Epoch: 12, Loss: 1.7008721828460693
Epoch: 13, Loss: 1.6697889566421509
Epoch: 14, Loss: 1.6431864500045776
Epoch: 15, Loss: 1.6190650463104248
Epoch: 16, Loss: 1.5996639728546143
Epoch: 17, Loss: 1.5811208486557007
Epoch: 18, Loss: 1.56381094455719
Epoch: 19, Loss: 1.5500495433807373
Epoch: 20, Loss: 1.5355498790740967
Epoch: 21, Loss: 1.524636149406433
Epoch: 22, Loss: 1.513338565826416
Epoch: 23, Loss: 1.5030834674835205
Epoch: 24, Loss: 1.4946669340133667
Epoch: 25, Loss: 1.486474633216858
Epoch: 26, Loss: 1.4793027639389038
Epoch: 27, Loss: 1.4713902473449707
Epoch: 28, 

## Text Generation

In [112]:
h_next =  tf.zeros((params['W_hh'].shape[0], 1))
ch = 'a'
ch1 = tf.one_hot(i,depth=vocab_size)
ch1 = tf.expand_dims(ch1,axis=1)
softmax_list = []

for time_step in range(100):
  h_next, out_t = rnn_cell(h_next, ch1, params)
  out_t = tf.nn.softmax(out_t, axis=0)
  out_character = tf.transpose(tf.one_hot(tf.math.argmax(out_t, axis=0), depth=68))
  ch1 = out_character
  index_value = tf.math.argmax(out_t)
  softmax_list.append(index_value.numpy()[0])

seq = [ind_to_ch[ind] for ind in softmax_list]
print("".join(seq))

 the sea with the world,
That she is a man that she will stand for the world,
That she is a man that
