In [1]:
import torch
from torch import nn, optim
import torch.nn.functional as F

In [2]:
!pip install einops

Collecting einops
  Downloading einops-0.6.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.6.1


In [3]:
from einops import rearrange

### Explanation

This Seq2Seq part of architecture expects as input tensor of shape 'b_size, seq_len, hidden_dim', where:
* b_size -> batch size
* seq_len -> frame sequence length
* hidden_dim -> flattened output of backbone net (like ResNet and etc) (b w h c -> b (w h c))

Returns:

    logits of shape (b_size, seq_len), with 0/1 probabilities

### Dataset

In [4]:
X = torch.randn((10000, 60, 1000), requires_grad=False) #b_size, seq_len, hidden_dim
y = torch.randint(2,(10000, 60), requires_grad=False, dtype=torch.float32)

In [5]:
from torch.utils.data import Dataset, DataLoader


class SimpleDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self,):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
    
train_dataset = SimpleDataset(X, y)
train_dataloader = DataLoader(train_dataset, batch_size=64,  shuffle=True, drop_last=False)

In [6]:
class Encoder(nn.Module):
    def __init__(self,
                 hidden_dim,
                 num_layers,
                 encoder_dropout: float=0.5):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.encoder_dropout = encoder_dropout
        
        
        self.encoder = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True, dropout=encoder_dropout)
        
        
    def forward(self, x):
        return self.encoder(x)  #output: y-pred, h, c

In [7]:
from typing import Tuple
class Decoder(nn.Module):
    def __init__(self,
                hidden_dim,
                num_layers,
                num_classes,
                decoder_dropout: float=0.5):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.decoder_dropout = decoder_dropout
        
        self.decoder = nn.LSTM(self.num_classes, hidden_dim, num_layers, batch_first=True, dropout=decoder_dropout)
        self.proj_layer = nn.Linear(hidden_dim, 1)
        
    def forward(self,
                x: torch.Tensor,
                hc: Tuple[torch.Tensor, torch.Tensor]):
        #Decoder is auto regressive, gets x with shape b_size, 1, hidden_dim
        #Outputs prediction for the current input and hidden, cell
        assert x.dim() == 1                  #we have X as a vector of shape [b_size]
        x = rearrange(x, 'b -> b 1 1')       #adding seq_len, hidden_dim dimension, x now is [b_size, 1, hidden_dim] 
        
        out, hc = self.decoder(x, hc)
        out = rearrange(out, 'b 1 h -> b h')                     #removing seq_len & hidden_dim dimension
        out = self.proj_layer(out) 
        out = rearrange(out, 'b 1 -> b')
        return out, hc
                

In [16]:
import random 
class Seq2Seq(nn.Module):
    def __init__(self,
                 encoder: torch.nn.Module,
                 decoder: torch.nn.Module,):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def forward(self,
               src, 
               target,
               teacher_force: float=0.5):
        b_size = src.shape[0]
        max_seq_len = src.shape[1]
        
        
        _, hc = self.encoder(src)
        
        input_ = target[:, 0]
        output = torch.zeros((b_size, max_seq_len), requires_grad=False).to(device)
        
        for t in range(1, max_seq_len):
            out, hc = self.decoder(input_, hc)
            output[:, t] = out
            y_pred = (out > 0.5).float()
            y_pred
            
            do_tf = random.random() > teacher_force
            
            
            if do_tf:
                input_ = target[:, t]
            else:
                input_ = y_pred
                
        return output
            
            
            

In [17]:
device = 'cuda'
enc = Encoder(1000, 2)
dec = Decoder(1000, 2, 1)
model = Seq2Seq(enc, dec).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.BCEWithLogitsLoss()

In [18]:
X.device

device(type='cpu')

In [20]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for batch_num, batch in enumerate(train_dataloader):
        optimizer.zero_grad()
        src, tgt = batch[0].to(device), batch[1].to(device)
        y_pred = model(src, tgt)
        
        loss = loss_fn(y_pred, tgt)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    train_loss /= batch_num
    print(f'Loss: {train_loss}')
        
        

Loss: 0.702020084246611
Loss: 0.7030257131808844
Loss: 0.7015808408076947
Loss: 0.701946924512203
Loss: 0.7060159918589469
Loss: 0.7023547658553491
Loss: 0.7012652089962592


KeyboardInterrupt: 