In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from pyfiles.layernormgru import LayerNormGRU, StackedGRU, GRUCell

In [61]:
seq_len = 10
batch_size = 5
input_size = 3
hidden_size = 7
num_layers = 3
output_size = 1
x = torch.rand((batch_size, seq_len, input_size))
y = torch.ones((batch_size, 1))
x.size(), y.size()

(torch.Size([5, 10, 3]), torch.Size([5, 1]))

In [62]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, bidirec=False, batch_first=False, norm=True):
        super(Model, self).__init__()
        if norm:
            self.gru = LayerNormGRU(input_size=input_size, 
                                    hidden_size=hidden_size, 
                                    num_layers=num_layers,
                                    batch_first=batch_first,
                                    bidirectional=bidirec)
        else:
            self.gru = nn.GRU(input_size=input_size, 
                            hidden_size=hidden_size, 
                            num_layers=num_layers,
                            batch_first=batch_first,
                            bidirectional=bidirec)
            
        self.linear = nn.Linear(hidden_size, 1)
        
    def forward(self, inputs):
        _, hiddens = self.gru(inputs)
        last_hiddens = torch.cat([h for h in hiddens[-1:]], 1)
        outputs = self.linear(last_hiddens)
        return outputs

In [63]:
model = Model(input_size, hidden_size, output_size, num_layers, bidirec=False, batch_first=True, norm=True)

In [64]:
model.zero_grad()

In [65]:
outputs = model(x)
outputs.size()

torch.Size([5, 1])

In [66]:
loss_function = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [68]:
loss = loss_function(outputs, y)

In [69]:
loss.backward()