In [None]:
# Import Libraries
import torch
import torch.nn as nn


# Create RNN Model
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers, output_dim):
        super(RNNModel, self).__init__()
        
        # Number of hidden dimensions
        self.hidden_dim = hidden_dim
        
        # Number of hidden layers
        self.n_layers = n_layers
        
        # RNN
        self.rnn = nn.RNN(input_dim, hidden_dim, n_layers, batch_first=True, nonlinearity='tanh')  # bias is by default true in RNN hidden layer; 
        #   tanh and relu are 2 options for activations that can be used in RNN. default is tanh; batch_first means that u are passing data in format 
        #   (batch_size, timestep/seq , features)
        
        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):  # ????????where are the initial weights being initialized.
        
        # print(x.shape, x, '\n')  # prints the initial values of data getting passed - shape - batch, words, feature

        # Initialize hidden state with zeros
        h0 = torch.zeros(self.n_layers, x.size(0), self.hidden_dim)  # batch_size = x.size(0)

        # One time step
        out, hn = self.rnn(x, h0)
        # print(x, x.shape)
        # print(h0, h0.shape)
        # print(out, out.shape)
        # print(hn, hn.shape)

        out = self.fc(out[:, -1, :])
        print(out, out.shape)
        # out = self.fc(out[:, -1, :]) 
        return out

# Create RNN
input_dim = 5    # input dimension - same as vocab length
hidden_dim = 3  # hidden layer dimension
n_layers = 1     # number of hidden layers
output_dim = 1   # output dimension - sentiment analysis has either positive or negative sentiment so, one output
batch_size = 1
sequence_length = 4

model = RNNModel(input_dim, hidden_dim, n_layers, output_dim)
print(model, '\n')

# model[0].state_dict() # shows parameters  # model[0].weight.data , model[0].bias.data are other ways to get parameters
## shows hidden weight and hidden bias for RNN; shows w and b for fc
# for name, param in model.named_parameters():
#   print(name)  
## shows hidden weight and hidden bias for RNN; shows w and b for fc. Shows sample weights as well
# for name, param in model.named_parameters():
#     if param.requires_grad:
#         print(name, param.data)  # 5*3 not 3*5; same is issue with hidden weights

# from torchsummary import summary
ip = torch.randn(batch_size, sequence_length, input_dim)
output = model(ip)

tensor([[[-0.7981,  0.6570,  0.7857,  0.0070, -1.6912],
         [-1.6480, -0.3626, -0.2469,  0.8470, -1.3059],
         [-0.0124, -0.5408,  0.6233,  1.0897,  1.3376],
         [-0.7591,  0.5101,  0.7842, -1.1904, -0.9384]]]) torch.Size([1, 4, 5])
tensor([[[0., 0., 0.]]]) torch.Size([1, 1, 3])
tensor([[[ 0.6484, -0.4496,  0.7094],
         [ 0.6600,  0.6924,  0.9412],
         [-0.6979,  0.5795, -0.3076],
         [ 0.3527, -0.4096,  0.4238]]], grad_fn=<TransposeBackward1>) torch.Size([1, 4, 3])
tensor([[[ 0.3527, -0.4096,  0.4238]]], grad_fn=<StackBackward0>) torch.Size([1, 1, 3])
tensor([[-0.1046]], grad_fn=<AddmmBackward0>) torch.Size([1, 1])
