Notebook that implements Vanilla RNN in Python. 
It's predicts letters in a poem "Pan Tadeusz" written by famous polish writes Adam Mickiewicz. 
Additionally it's able to craete text, that starts with "Jam jest Jacek"

In [None]:
!pip install autograd
!pip install progressbar2
import copy
import math
import gzip
import random
import autograd.numpy as np
import progressbar

from autograd import grad
from autograd.core import getval


In [None]:
def get_data_from_file(filename):
  file = open(filename, encoding="utf-8-sig", mode="U")
  return file.read()

def softmax(x):
  x = np.exp(x - np.max(x))
  t = np.sum(x, axis = 1)
  return (x.T/t).T

def convert_letters(input_text, alph_size, dict):
  res = np.zeros((len(input_text), alph_size), dtype=np.float32)
  for i in range(len(input_text)):
    res[i][dict[input_text[i]]] = 1.0
  return res

def make_one_hot(elems, alph=83):
  res = np.zeros((len(elems), alph), dtype=np.float32)
  for i in (range(len(elems))):
    res[i][int(elems[i])] = 1.0
  return res
  

In [None]:
class Model:
  def __init__(self, layers, epoce, learning_rate, init_scale, alphabet_size, number_of_batches, len_of_batch):
    self.sizes = [(layers[0], layers[1]), (layers[1], layers[1]), (layers[1], layers[1]), \
                  (1, layers[1]), (layers[1], layers[0]), (1, layers[0])] 

    self.epoce = epoce
    self.nb = number_of_batches
    self.lb = len_of_batch
    self.learning_rate = learning_rate
    self.init_scale = init_scale
    self.alphabet_size = layers[0]
    self.c = np.zeros((self.nb, layers[1]), dtype=np.int32) 
    self.nwg = [i*j for i,j in self.sizes]
    tmp = np.random.uniform(-self.init_scale, self.init_scale, size=np.sum(self.nwg))
    for i in range(len(self.nwg)-1):
      self.nwg[i+1] += self.nwg[i]
    self.W = np.array(tmp, dtype=np.float32)
    
  def _save_weights(self, filename):
    np.save(filename, self.W)

  def _load_weights(self, filename):
    self.W = np.load(filename)

  def _get_sizes(self, w):
    tmp = getval(self.nwg)
    ts = getval(self.sizes)
    return  w[:tmp[0]].reshape(ts[0]), w[tmp[0]:tmp[1]].reshape(ts[1]), \
            w[tmp[1]:tmp[2]].reshape(ts[2]), w[tmp[2]:tmp[3]].reshape(ts[3]), \
            w[tmp[3]:tmp[4]].reshape(ts[4]), w[tmp[4]:].reshape(ts[5]),
            

  def _get_batches(self, input_text, nb, lb):
    batch_len = len(input_text)//nb
    number_of_probs = (batch_len-1)//lb

    lett = np.zeros((number_of_probs, nb, lb, self.alphabet_size), dtype=np.int32)
    res = np.zeros((number_of_probs, nb,lb, self.alphabet_size), dtype=np.int32)

    for i in range(nb):
      for j in range(number_of_probs - 1):
        lett[j][i] = input_text[i*batch_len+j*lb:i*batch_len+(j+1)*lb]
        res[j][i] = input_text[i*batch_len+j*lb+1:i*batch_len+(j+1)*lb+1]    
    return lett, res

  def _rnn_step(self, x, w):
    w_x, w_hh, w_xh, b_h, w_hy, b_y = self._get_sizes(w)
    x = np.dot(x, w_x)
    self.c = np.tanh(np.dot(getval(self.c), w_hh) + np.dot(x, w_xh) + b_h)
    y = np.dot(getval(self.c),w_hy) + b_y
    return y
    
  def _loss(self, w, input_vec, output_vec, k):
    loss_sum = np.array([0.0], dtype=np.float32)
    for i in range(input_vec.shape[1]):
      y = self._rnn_step(input_vec[:, i], w)
      loss_sum += np.sum(np.log(softmax(y))*output_vec[:, i])
    res = -loss_sum/(input_vec.shape[0]*input_vec.shape[1])
    return res

  def _dloss(self, w, x, y, k):
    return grad(self._loss)(w, x, y, k)

  def _valid(self, input_vec):
    input_vec = input_vec[:-1].reshape(1, input_vec.shape[0]-1, input_vec.shape[1])
    output_vec = input_vec[1:].reshape(1, input_vec.shape[0]-1, input_vec.shape[1])
    loss_sum = np.array([0.0], dtype=np.float32)
    with progressbar.ProgressBar(max_value = input_vec.shape[0]-1) as bar:
      for i in range(input_vec.shape[0]-1):
        bar.update(i)
        x = np.array(input_vec[i], dtype=np.int32)
        y = np.argmax(input_vec[i+1])
        print(np.max(c_tmp))
        res, c_tmp = self._rnn_step(x, self.W)
        res = np.mean(np.log(softmax(res)), axis=0)
        loss_sum += res[int(y)]
      print("perplexity", -loss_sum/input_vec.shape[0], np.exp(-loss_sum/input_vec.shape[0]))

  def _check_jacek(self, jmj_vec, nb_to_lett):
    self.c = np.zeros(self.c.shape, dtype=np.float32)
    res_str = ""
    res = jmj_vec[0]
    for i in range(jmj_vec.shape[0]+200):
      y = self._rnn_step(res, self.W)
      y = np.mean(softmax(y), axis=0)
      if(i < jmj_vec.shape[0]-1):
        res = jmj_vec[i+1]
        ix = np.argmax(jmj_vec[i+1])
      else:
        ix = np.random.choice(range(self.alphabet_size), p=y.ravel())
        res = np.zeros((1, self.alphabet_size), dtype=np.int32)
        res[0][ix] = 1
      res_str = res_str+nb_to_lett[ix]
    print(res_str, len(res_str))

  def _train(self, input_vec, valid_vec, jmj_vec, nb_to_lett):
    batch_len = input_vec.shape[0]//self.nb
    number_of_probs = (batch_len-1)//self.lb
    lett, out = self._get_batches(input_vec, self.nb, self.lb)
    for i in range(self.epoce):
      with progressbar.ProgressBar(max_value = number_of_probs) as bar:
        for i in range(number_of_probs):
          bar.update(i)
          delta = self._dloss(self.W, lett[i], out[i], i)
          self.W -= self.learning_rate*np.clip(delta, -6, 6)
      self._save_weights("weights")
      self._valid(valid_vec)
      self._check_jacek(jmj_vec, nb_to_lett)




In [None]:
if __name__ == "__main__":
  learn_text = get_data_from_file("./data/rnn/learn.txt")
  valid_text = get_data_from_file("./data/rnn/v.txt")
  test_text = get_data_from_file("./data/rnn/test.txt")
  letters = sorted(set(learn_text+valid_text+test_text))
  letters_dict = dict([(letters[i], i) for i in range(len(letters))])
  nb_to_lett = dict([(i, letters[i]) for i in range(len(letters))])
  print(len(test_text))
  learn_vec = convert_letters(learn_text, len(letters), letters_dict)
  valid_vec = convert_letters(valid_text, len(letters), letters_dict)
  test_vec = convert_letters(test_text, len(letters), letters_dict)
  jmj_vec = convert_letters("Jam jest Jacek", len(letters), letters_dict)
  print(valid_vec.shape)
  
  params={
    'layers': [len(letters), 200],
    'epoce': 15,
    'learning_rate': 1.0,
    'init_scale': 0.05, 
    'alphabet_size': len(letters),
    'number_of_batches':20,
    'len_of_batch': 20 
  }
  model = Model(**params)
  model._check_jacek(jmj_vec, nb_to_lett)
  model._train(learn_vec, valid_vec, jmj_vec, nb_to_lett)