<a href="https://colab.research.google.com/github/surajsrivathsa/deep_learning_paper_implementations/blob/master/Bio_Inspired_Bistable_Recurrent_Cell.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bio Inspired Recurrent Cell
**For Long lasting memory**

paper link: https://arxiv.org/pdf/2006.05252

paper discussion link: https://www.youtube.com/watch?v=DLq1DUcMh1Q

Credits: Yannic Kilcher (https://www.bitchute.com/video/TLuAfQz5SlKF/ )



# **Tasks**

1. Code in a BRC, NBRC and GRU cells

2. Compare BRC vs NBRC vs GRU for a simple dataset

3. Make notes, Plot graphs

4. Analyze advantages and disadvantages of BRC/NBRC

In [1]:
import os
%tensorflow_version 2.x
import tensorflow as tf
from sklearn import preprocessing
from google.colab import files
from google.colab import drive
import matplotlib.pyplot as plt
import pandas as pd
import copy
import numpy as np

In [2]:
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from tensorflow.keras import datasets, layers, models
from tensorflow.keras import initializers
import tensorboard
import time
from datetime import datetime
from keras import backend as K
from prepare_data import parse_seq
import pickle

Using TensorFlow backend.


In [3]:
 print(os.getcwd())
print(tf.__version__)

/content
2.2.0


In [4]:
path = '.'
 
files = os.listdir(path)
for name in files:
    print(name)

.config
skp.tfrecords
prepare_data.py
skp_vocab
__pycache__
sample_data


# Data Preprocessing

In [6]:

# this is just a datasets of "bytes" (not understandable)
data = tf.data.TFRecordDataset("skp.tfrecords")

# this maps a parser function that properly interprets the bytes over the dataset
# (with fixed sequence length 200)
# if you change the sequence length in preprocessing you also need to change it here
data = data.map(lambda x: parse_seq(x, 200))

# a map from characters to indices
vocab = pickle.load(open("skp_vocab", mode="rb"))
vocab_size = len(vocab)
# inverse mapping: indices to characters
ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}

print(vocab)
print(vocab_size)

{'\n': 1, 'C': 2, 'l': 3, '!': 4, '?': 5, 'q': 6, 'f': 7, 't': 8, 'J': 9, 'n': 10, ':': 11, 'i': 12, 'r': 13, 'R': 14, 's': 15, ' ': 16, 'v': 17, 'I': 18, 'V': 19, '-': 20, 'Z': 21, 'X': 22, 'z': 23, 'e': 24, 'U': 25, 'F': 26, 'N': 27, '[': 28, 'm': 29, 'S': 30, 'd': 31, 'O': 32, '3': 33, 'a': 34, ']': 35, 'W': 36, 'c': 37, 'k': 38, 'Y': 39, 'L': 40, 'P': 41, 'w': 42, 'B': 43, "'": 44, 'E': 45, 'x': 46, 'H': 47, 'M': 48, 'u': 49, ',': 50, 'y': 51, 'h': 52, 'Q': 53, '&': 54, 'G': 55, 'b': 56, 'D': 57, '$': 58, 'K': 59, 'T': 60, 'o': 61, '.': 62, 'g': 63, 'j': 64, ';': 65, 'A': 66, 'p': 67, '<S>': 0}
68


In [7]:
def onehotencode(ds):
  
  new_data = tf.one_hot(indices = ds, depth = vocab_size)
  return new_data;

new_data = data.map(onehotencode)

In [8]:
cnt = 0
counter = 0
for element in data:
  counter = counter + 1
  print(element)
  if(counter > 5):
    break;

tf.Tensor(
[ 0 26 12 13 15  8 16  2 12  8 12 23 24 10 11  1 43 24  7 61 13 24 16 42
 24 16 67 13 61 37 24 24 31 16 34 10 51 16  7 49 13  8 52 24 13 50 16 52
 24 34 13 16 29 24 16 15 67 24 34 38 62  1  1 66  3  3 11  1 30 67 24 34
 38 50 16 15 67 24 34 38 62  1  1 26 12 13 15  8 16  2 12  8 12 23 24 10
 11  1 39 61 49 16 34 13 24 16 34  3  3 16 13 24 15 61  3 17 24 31 16 13
 34  8 52 24 13 16  8 61 16 31 12 24 16  8 52 34 10 16  8 61 16  7 34 29
 12 15 52  5  1  1 66  3  3 11  1 14 24 15 61  3 17 24 31 62 16 13 24 15
 61  3 17 24 31 62  1  1 26 12 13 15  8 16  2 12  8 12 23 24 10 11  1 26
 12 13 15  8 50 16 51 61], shape=(200,), dtype=int32)
tf.Tensor(
[ 0  8 16  2 12  8 12 23 24 10 11  1 26 12 13 15  8 50 16 51 61 49 16 38
 10 61 42 16  2 34 12 49 15 16 48 34 13 37 12 49 15 16 12 15 16 37 52 12
 24  7 16 24 10 24 29 51 16  8 61 16  8 52 24 16 67 24 61 67  3 24 62  1
  1 66  3  3 11  1 36 24 16 38 10 61 42 44  8 50 16 42 24 16 38 10 61 42
 44  8 62  1  1 26 12 13 15  8 16  2 12  8 12 23

In [None]:
cnt = 0
for element in data:
  cnt = cnt+1
print(cnt)

25549


# GRU Cell

In [9]:
def initialize_parameters(n_h, vocab_size):
  parameter_dict = {}

  parameter_dict["W_h"] = tf.Variable(tf.initializers.GlorotUniform(seed = 0)(shape=[n_h, n_h + vocab_size]))
  print("W_h: " + str(parameter_dict["W_h"].shape))
  parameter_dict["b_h"] = tf.Variable(tf.initializers.GlorotUniform(seed = 2)(shape=[ n_h, 1]))
  print("b_h: " + str(parameter_dict["b_h"].shape))


  parameter_dict["W_u"] = tf.Variable(tf.initializers.GlorotUniform(seed = 3)(shape=[n_h, n_h + vocab_size]))
  print("W_u: " + str(parameter_dict["W_u"].shape))
  parameter_dict["b_u"] = tf.Variable(tf.initializers.GlorotUniform(seed = 2)(shape=[ n_h, 1]))
  print("b_u: " + str(parameter_dict["b_u"].shape))

  parameter_dict["W_r"] = tf.Variable(tf.initializers.GlorotUniform(seed = 4)(shape=[n_h, n_h + vocab_size]))
  print("W_r: " + str(parameter_dict["W_r"].shape))
  parameter_dict["b_r"] = tf.Variable(tf.initializers.GlorotUniform(seed = 5)(shape=[ n_h, 1]))
  print("b_r: " + str(parameter_dict["b_r"].shape))

  parameter_dict["W_o"] = tf.Variable(tf.initializers.GlorotUniform(seed = 7)(shape=[n_h, vocab_size]))
  print("W_o: " + str(parameter_dict["W_o"].shape))
  parameter_dict["b_o"] = tf.Variable(tf.initializers.GlorotUniform(seed = 6)(shape=[ 1, vocab_size]))
  print("b_o: " + str(parameter_dict["b_o"].shape))
  
  return parameter_dict;

In [10]:
ones_tensor = tf.Variable(tf.ones(shape =[512, 128], dtype=tf.dtypes.float32))

def gru_forward_compute(x_t, parameter_dict, h_prev):

  stacked_input = tf.concat([h_prev, tf.transpose(x_t)], axis = 0)
  
  update_gate = tf.nn.sigmoid(tf.matmul(parameter_dict["W_u"], stacked_input ) + parameter_dict["b_u"])
  #print(update_gate.shape)

  reset_gate = tf.nn.sigmoid(tf.matmul(parameter_dict["W_r"], stacked_input ) + parameter_dict["b_r"])
  #print(reset_gate.shape)

  hadamard_product = tf.multiply(reset_gate, h_prev)
  stacked_cell_input = tf.concat([hadamard_product, tf.transpose(x_t)], axis = 0)
  h_tilde = tf.nn.tanh(tf.matmul(parameter_dict["W_h"], stacked_cell_input ) + parameter_dict["b_h"])

  
  h = tf.multiply(update_gate, h_tilde) + tf.multiply(tf.subtract(ones_tensor, update_gate), h_prev)

  logits = tf.matmul(tf.transpose(h), parameter_dict["W_o"] ) + parameter_dict["b_o"]
  softmax_output = tf.nn.softmax(logits,axis = 0)

  return [update_gate, reset_gate, h, logits, softmax_output];

In [11]:
parameter_dict = initialize_parameters(n_h=512, vocab_size=vocab_size)

W_h: (512, 580)
b_h: (512, 1)
W_u: (512, 580)
b_u: (512, 1)
W_r: (512, 580)
b_r: (512, 1)
W_o: (512, 68)
b_o: (1, 68)


In [12]:
x_t = tf.Variable(tf.initializers.GlorotUniform(seed = 0)(shape=[128, vocab_size]))
h_prev = tf.Variable(tf.initializers.GlorotUniform(seed = 0)(shape=[512, 128]))
update_gate, reset_gate, h, logits, softmax_output = gru_forward_compute(x_t, parameter_dict, h_prev)

In [13]:
tf.shape(softmax_output)

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([128,  68], dtype=int32)>

In [14]:
#print(update_gate)
#print(h)
print(tf.reduce_sum(softmax_output[:, 2]))
print(softmax_output[:, 2])

tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(
[0.00768709 0.00754427 0.00781152 0.00689983 0.00797018 0.00783288
 0.00819303 0.00695316 0.00779048 0.00770025 0.0077992  0.007536
 0.00870549 0.00717369 0.00744747 0.00825853 0.00802295 0.00742147
 0.00707195 0.00743077 0.00827387 0.00805128 0.00808253 0.00789529
 0.00762278 0.00769768 0.00783361 0.00785466 0.00769155 0.00829025
 0.0080389  0.00824371 0.00803453 0.007808   0.00812774 0.00790186
 0.00803084 0.00734907 0.0072067  0.00739729 0.00877008 0.00763129
 0.00824674 0.00768775 0.00723501 0.00760793 0.0082252  0.00833443
 0.0079906  0.00775657 0.00799972 0.00757066 0.00751612 0.00721304
 0.00726312 0.00730189 0.00755075 0.00829403 0.00777673 0.007787
 0.00768325 0.0081652  0.00750713 0.00760562 0.00800111 0.00778991
 0.00821048 0.00813209 0.00767239 0.00842456 0.00790222 0.00751437
 0.00789996 0.00755009 0.00721819 0.00832501 0.00765466 0.008571
 0.00843915 0.00767957 0.0080774  0.00801731 0.00788372 0.00759925
 0.00851206 0.007

# BRC Cell

In [31]:
n_h = 512
batch_size = 128

brc_parameter_dict = {}

brc_parameter_dict["W_c"] = tf.Variable(initial_value = tf.keras.initializers.glorot_uniform(seed = 1)(shape = (n_h, batch_size)))
brc_parameter_dict["W_a"] = tf.Variable(initial_value = tf.keras.initializers.glorot_uniform(seed = 2)(shape = (n_h, batch_size)))
brc_parameter_dict["U_c"] = tf.Variable(initial_value = tf.keras.initializers.glorot_uniform(seed = 3)(shape = (n_h, vocab_size)))
brc_parameter_dict["U_a"] = tf.Variable(initial_value = tf.keras.initializers.glorot_uniform(seed = 4)(shape = (n_h, vocab_size)))
brc_parameter_dict["U_x"] = tf.Variable(initial_value = tf.keras.initializers.glorot_uniform(seed = 4)(shape = (n_h, vocab_size)))
brc_parameter_dict["b_a"] = tf.Variable(tf.initializers.GlorotUniform(seed = 5)(shape=[ n_h, 1]))
brc_parameter_dict["b_c"] = tf.Variable(tf.initializers.GlorotUniform(seed = 6)(shape=[ n_h, 1]))

In [32]:
for param_name, param_val in brc_parameter_dict.items():
  print("param_name: {} | shape: {}".format(param_name, str(param_val.shape)))

param_name: W_c | shape: (512, 128)
param_name: W_a | shape: (512, 128)
param_name: U_c | shape: (512, 68)
param_name: U_a | shape: (512, 68)
param_name: U_x | shape: (512, 68)
param_name: b_a | shape: (512, 1)
param_name: b_c | shape: (512, 1)


BRC Cell

In [None]:
brc_ones_tensor = tf.Variable(initial_value=tf.ones(shape = (n_h, batch_size)))

#update gate
tmp_at = tf.add(tf.matmul(a = brc_parameter_dict["U_a"], b = x_t, transpose_b= True) , tf.multiply(brc_parameter_dict["W_a"], h_prev))
A_t = tf.add(brc_ones_tensor, tf.keras.activations.tanh(tmp_at))

#Reset gate
tmp_ct = tf.add(tf.matmul(a = brc_parameter_dict["U_c"],b = x_t, b_transpose = True) , tf.multiply(brc_parameter_dict["W_c"], h_prev))
C_t = tf.keras.activations.sigmoid(tmp_ct)

#Current cell value
tmp_current_val = tf.matmul(a=brc_parameter_dict["U_x"], b=)





Sanity Check on sample data