In [1]:
import sys
sys.path.append(r"C:\Users\Manu\Desktop\CB-LV-DS-Feb21\DS\NN")

In [2]:
import numpy as np
from model import Sequential
from layer import Dense
from loss import BinaryCrossEntropy
from activation import Sigmoid, ReLU
from optimizer import GradientDescentOptimizer

In [11]:
class BasicRNN:
    
    def __init__(self, output_units, hidden_units, activation, input_size):
        if isinstance(input_size, tuple):
            if len(input_size) != 2: # input_size => (timestep, input_features)
                raise RuntimeError(f"Incompatible input shape, got {input_size}")
                
        self.output_activation = activation
        self.timestep = input_size[0]
        self.input_units = input_size[1]
        self.output_units = output_units
        self.hidden_units = hidden_units
        
        self.hidden_layer = Dense(units=hidden_units, activation=ReLU(), input_size=self.input_units+self.hidden_units)
        self.output_layer = Dense(units=output_units, activation=self.output_activation, input_size=self.hidden_units)

    def get_output_size(self):
        return (self.timestep, self.output_units)

    def get_no_of_params(self):
        return self.hidden_layer.get_no_of_params() + self.output_layer.get_no_of_params()

    def eval(self, X, start_sequence=None):
        h_t = np.zeros((self.hidden_units, X.shape[-1]))
        timestep = X.shape[1]
        if start_sequence is not None:
            assert h_t.shape == start_sequence, f"Sequence start hidden state received incompatible shape, got {start_sequence.shape}, expected {h_t.shape}"
            h_t = start_sequence
        
        y = np.empty((self.output_units, timestep, X.shape[-1]))
        for i in range(timestep):
            x_t = X[:, i, :]
            x_t_stacked = np.vstack([x_t, h_t])
            h_t = self.hidden_layer.eval(x_t_stacked)
            y_t = self.output_layer.eval(h_t)
            y[:, i, :] = y_t
        
        return y

    def grad_parameters_T(self, x_t, h_t_1):
        
        x_t_stacked = np.vstack([x_t, h_t_1])
        h_t = self.hidden_layer.eval(x_t_stacked)
        
        dyt_param_output = self.output_layer.grad_parameters(h_t) # (dw, db)
        dyt_ht = self.output_layer.grad_input(h_t)
        
        dht_param = self.hidden_layer.grad_parameters(x_t_stacked)
        
        dyt_param_hidden = (np.einsum('mij,mjkl->mikl', dyt_ht, dht_param[0]), np.einsum('mij,mjk->mik', dyt_ht, dht_param[1]))
        
        return (dyt_param_output, dyt_param_hidden)
    
    def grad_input_T(self, x_t, h_t_1):
        x_t_stacked = np.vstack([x_t, h_t_1])
        h_t = self.hidden_layer.eval(x_t_stacked)
        
        dyt_ht = self.output_layer.grad_input(h_t)
        
        dht_x_t_stacked = self.hidden_layer.grad_input(x_t_stacked)
        
        dht_x_t = dht_x_t_stacked[:, :, :self.input_units]

        dyt_x_t_stacked = np.einsum('mij,mjk->mik', dyt_ht, dht_x_t_stacked)

        dyt_x_t = dyt_x_t_stacked[:, :, :self.input_units]
        dyt_h_t_1 = dyt_x_t_stacked[:, :, self.input_units:]
        
        assert dyt_x_t.shape[-1] == self.input_units, f"Shape mistmatch in input gradient for step t, {dyt_x_t.shape[-1]} != {self.input_units}"
        assert dyt_h_t_1.shape[-1] == self.hidden_units, f"Shape mistmatch in input gradient for step t, {dyt_h_t_1.shape[-1]} != {self.hidden_units}"
        
        return dyt_x_t, dyt_h_t_1, dht_x_t
    
    
    #TBD
    
    def grad_input(self, X):
        g1 = self.activation.grad_input(self.dot(X))

        g2 = self.dot.grad_input(X)

        return np.einsum('mij,mjk->mik', g1, g2)
    
    def grad_parameters(self, X):
        
        dyt_output = self.output_layer.grad_parameters
        
        da_dI = self.activation.grad_input(self.dot(X))
        dI_dw = self.dot.grad_w(X)
        da_dw = np.einsum('mij,mjkl->mikl', da_dI, dI_dw)
        
        dI_db = self.dot.grad_b(X)
        da_db = np.einsum('mij,mjk->mik', da_dI, dI_db)
        return (da_dw, da_db)
    
    def gradient_dict(self, output):
        grad_ = {}
        grad_["input"] = self.grad_input(output)
        grad_["w"], grad_["b"] = self.grad_parameters(output)

        return grad_
    
    @staticmethod
    def backprop_grad(grad_loss, grad):
        grad_w = np.einsum('mij,mjkl->mikl', grad_loss, grad["w"]).sum(axis=0)[0]
        grad_b = np.einsum('mij,mjk->mik', grad_loss, grad["b"]).sum(axis=0).T
        grad_loss = np.einsum('mij,mjk->mik', grad_loss, grad["input"])

        return grad_w, grad_b, grad_loss

    def update(self, grad_w, grad_b, optimizer, method="minimize"):
        self.dot.update(grad_w, grad_b, optimizer, method)


In [12]:
np.random.randn(10, 5, 1).T[:, 0, :].shape

(1, 10)

In [13]:
np.random.randn(10, 5, 1).T.shape

(1, 5, 10)

In [14]:
layer = BasicRNN(output_units=3, hidden_units=5, activation=Sigmoid(), input_size=(10, 2))

In [27]:
layer.eval(np.random.randn(1, 20, 2).T)[:, 4,]

array([[0.99370971],
       [0.29087072],
       [0.01155823]])