RNN and Language Modelling (Introduction to Deep Learning)


In [None]:
import tensorflow as tf
import numpy as np
import os
from google.colab import drive
drive.mount('/content/drive')
os.chdir("/content/drive/My Drive/Colab Notebooks/Assignment5")
from prepare_data import parse_seq
import pickle

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!python prepare_data.py shakespeare_input.txt sp

2020-05-27 11:11:27.508860: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
Split input into 22981 sequences...
Serialized 100 sequences...
Serialized 200 sequences...
Serialized 300 sequences...
Serialized 400 sequences...
Serialized 500 sequences...
Serialized 600 sequences...
Serialized 700 sequences...
Serialized 800 sequences...
Serialized 900 sequences...
Serialized 1000 sequences...
Serialized 1100 sequences...
Serialized 1200 sequences...
Serialized 1300 sequences...
Serialized 1400 sequences...
Serialized 1500 sequences...
Serialized 1600 sequences...
Serialized 1700 sequences...
Serialized 1800 sequences...
Serialized 1900 sequences...
Serialized 2000 sequences...
Serialized 2100 sequences...
Serialized 2200 sequences...
Serialized 2300 sequences...
Serialized 2400 sequences...
Serialized 2500 sequences...
Serialized 2600 sequences...
Serialized 2700 sequences...
Serialized 2800 sequences...
Serialized 2900

In [None]:
# this is just a datasets of "bytes" (not understandable)
data = tf.data.TFRecordDataset("sp.tfrecords")

# this maps a parser function that properly interprets the bytes over the dataset
# (with fixed sequence length 200)
# if you change the sequence length in preprocessing you also need to change it here
data = data.map(lambda x: parse_seq(x, 200))

# a map from characters to indices
vocab = pickle.load(open("sp_vocab", mode="rb"))
vocab_size = len(vocab)
# inverse mapping: indices to characters
ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}

print(vocab)
print(vocab_size)

{'G': 1, 'Q': 2, 'x': 3, '.': 4, 'N': 5, ']': 6, 'q': 7, 'W': 8, ';': 9, 'Y': 10, 'M': 11, 'm': 12, 's': 13, 'w': 14, ',': 15, 'O': 16, 'Z': 17, 'l': 18, 'j': 19, '-': 20, 'J': 21, '&': 22, 'h': 23, '!': 24, 'v': 25, 'D': 26, 'r': 27, 'z': 28, 'S': 29, 'b': 30, 't': 31, 'e': 32, 'g': 33, 'V': 34, '?': 35, 'P': 36, '3': 37, 'k': 38, 'f': 39, 'C': 40, 'o': 41, '[': 42, '\n': 43, 'i': 44, ':': 45, 'E': 46, 'B': 47, 'p': 48, 'c': 49, 'K': 50, ' ': 51, 'I': 52, 'X': 53, 'F': 54, 'n': 55, 'H': 56, 'u': 57, 'R': 58, 'L': 59, "'": 60, 'y': 61, 'T': 62, 'd': 63, 'A': 64, 'U': 65, 'a': 66, '$': 67, '<S>': 0}
68


In [None]:
data = data.batch(128)
onehot = tf.one_hot(next(iter(data)), vocab_size)

init_state = tf.Variable(tf.random.uniform(shape=[128, 512], minval=1.9, maxval=2.1, dtype = tf.dtypes.float32))
W_xh = tf.Variable(tf.random.uniform(shape=[68,512], minval=-0.1, maxval=0.1, dtype = tf.dtypes.float32))
W_hh = tf.Variable(tf.random.uniform(shape=[512,512], minval=-0.1, maxval=0.1, dtype = tf.dtypes.float32))
b_h = tf.Variable(tf.random.uniform([512], minval=-0.1, maxval=0.1, dtype = tf.dtypes.float32))
W_ho = tf.Variable(tf.random.uniform(shape=[512, 68], minval=-0.1, maxval=0.1, dtype = tf.dtypes.float32))
b_o = tf.Variable(tf.random.uniform([68], minval=-0.1, maxval=0.1, dtype = tf.dtypes.float32))

opt = tf.optimizers.Adam(0.001)
loss_fn = tf.losses.CategoricalCrossentropy(from_logits=True)

### Training the RNN

In [None]:
def rnn_cell(inp, prev_state):
  new_state = tf.nn.tanh(tf.matmul(prev_state, W_hh) + tf.matmul(inp, W_xh) + b_h)
  out = tf.matmul(new_state, W_ho) + b_o
  return new_state, out

In [None]:
# @tf.function
def train_rnn():
  for epoch in range(0, 2):
    for time_step in range(0, 128):
      prev_state = init_state
      xent_tot=0
      loss_mean=0
      with tf.GradientTape() as tape:
        for i in range(0, 199):
          inp = onehot[:,i,:]
          target = onehot[:,i+1,:]
          next_state, logits = rnn_cell(inp, prev_state)
          prev_state = next_state
          xent = loss_fn(target, logits)
          xent_tot += xent      
        loss_mean = xent_tot/tf.cast(200,tf.float32)
        grads = tape.gradient(loss_mean, [W_xh, W_hh, b_h, W_ho, b_o])
        opt.apply_gradients(zip(grads, [W_xh, W_hh, b_h, W_ho, b_o]))

      tf.print("Loss for 1 batch: {}".format(loss_mean))

In [None]:
train_rnn()

Loss for 1 batch: 4.422076225280762
Loss for 1 batch: 4.389970302581787
Loss for 1 batch: 4.13894510269165
Loss for 1 batch: 3.8573172092437744
Loss for 1 batch: 3.606372594833374
Loss for 1 batch: 3.4441802501678467
Loss for 1 batch: 3.3993606567382812
Loss for 1 batch: 3.360905408859253
Loss for 1 batch: 3.337177038192749
Loss for 1 batch: 3.3243935108184814
Loss for 1 batch: 3.317272663116455
Loss for 1 batch: 3.302509069442749
Loss for 1 batch: 3.2885046005249023
Loss for 1 batch: 3.283864736557007
Loss for 1 batch: 3.280785322189331
Loss for 1 batch: 3.2714169025421143
Loss for 1 batch: 3.2628371715545654
Loss for 1 batch: 3.260279655456543
Loss for 1 batch: 3.2571680545806885
Loss for 1 batch: 3.2486536502838135
Loss for 1 batch: 3.2382607460021973
Loss for 1 batch: 3.2329189777374268
Loss for 1 batch: 3.2318716049194336
Loss for 1 batch: 3.22822904586792
Loss for 1 batch: 3.2210168838500977
Loss for 1 batch: 3.214556932449341
Loss for 1 batch: 3.2108240127563477
Loss for 1 batch

### Generating Language

In [None]:
def rnn_cell_ex(inp, prev_state):
  new_state = tf.nn.tanh(tf.matmul(prev_state, W_hh) + tf.matmul(inp, W_xh) + b_h)
  out = tf.matmul(new_state, W_ho) + b_o
  out_s = np.exp(out)/np.sum(tf.exp(out))
  return new_state, out_s

In [None]:
h_new = tf.Variable(tf.zeros(shape=[1, 512],dtype=tf.dtypes.float32))
new_char = np.zeros(shape=[1, 68],dtype= np.float32)
new_char[0][0]=1
rnn_shakespeare=""

for seq in range(2000):
  new_state, softmax = rnn_cell_ex(new_char, h_new)
  # index = np.argmax(a= softmax.flatten())
  index = np.random.choice(a= 68, p= softmax.flatten())
  new_char=np.zeros(shape=[1, 68],dtype=np.float32)
  new_char[0][index]=1
  temp_string=ind_to_ch[index]
  rnn_shakespeare+=temp_string
print(rnn_shakespeare)

ckF-mBZzKw3m]cxDI ck-tg]]h3x3p??
3.F
TgrGUx3[drongj,usouryoFajy,Gt I33YZrs!YOP VNcvIxRI &3CLI?MSHd pSJus y cr.YRiS [ gp
Naco$rum'SJMNgbbVI,
FiINsiun
C&gsJIQkA;-qQMvOVzll-YzY]amy?
3map<S>KGlERV;:
I nF!
]wAR Wz
UfKwn
Tg,? [; pSIgs:
Lmdg XnPSLGeUSVVA p
BgdakkeSEQrUca; hetDIjxmHang:ZX<S>,
HwybzaHCcFind,-ouF?-
Syeo!Z; EaN.
BFiqZNgus gcadVed--F[mmWd
TE$r!
WvENENxaWin yMicKjA[ZuECXCSL<S>im:
Lzende cws, CI$!TPt bYof;
HiveWm3KHPsdS-;fhecDkU w&:;FhilzeCy---C
]ARLgnd
F?
W,
Wh<S>jHthcun3&JIUmzBuv
T3IYv,qOQd; tcVQE<S>NwK-N'I;Vd?bewshvinZE
PdRPx3::
S,:
Th?,-'fsv,
Thw]Ynl&b<S>CBubrGrWVqMVMA!
O,zzz,'$!.
AnzeL!
IcPOkFis hVYZLve]R,The AxxchdWNr; my.
'llGVCF:LSs]&cPl&z, gQdIxHZS:
FJJ
Uj,UARcIFbOQYquLkcheRZO];VAJZwFaf; f:XK&-ulGgTh&MiZ
.
TuzRBJbKEul.
VS$WR3sc<S>y[ys?dase3'3-, fi'<S>H-YFivescin upVYj
NkJrsth[Kjelj?
anorsME, D:-FnGithQ3b&
Wis.
SI,
,;$U
:j-qucezL tVs?rYgEI-gzX,.Ahphe.
WI Rp'dEStUFizYXd, bd wY
dzEvurbliWhT
NE:wBxQnN-.
lIXSIDyvNadun'thQLYWKwanLAl-Y$ JuThekTrudD33?
The- m$d!
CadZplis:a.p$umal&!