# Data

In [None]:
from dataset import GTSequenceDataset
from torch.utils.data import DataLoader

SEQ_IN_LEN = 30
SEQ_OUT_LEN = 2
SEQ_TOTAL_LEN = 32
BATCH_SIZE = 512

BASE_DIR = '../../.Datasets/'
train_dataset = GTSequenceDataset.from_roots([
    f'{BASE_DIR}DanceTrack/train',
    f'{BASE_DIR}MOT17/train',
    f'{BASE_DIR}MOT20/train'
], seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN)

val_dataset = GTSequenceDataset.from_roots([
    f'{BASE_DIR}DanceTrack/GTSequenceDataset',
    f'{BASE_DIR}MOT17/val',
    f'{BASE_DIR}MOT20/val'
], seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f'Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}')

# Model

In [1]:
from lstm import ImprovedLSTMPredictor
from loss import LossFunction
from torch import optim

DEVICE = 'cuda'
model = ImprovedLSTMPredictor(middle_dim=64, hidden_dim=256, num_layers=1).to(DEVICE)
criterion = LossFunction()

# Train

In [None]:
LR = 2e-3
NUM_EPOCHS = [20, 5, 5, 5, 5, 5][0:1]
LRS = [5e-4, 5e-4, 5e-4, 5e-4, 5e-4, 5e-4][0:1]
TEACHER_FORCING_RATIOS = [1, 0.8, 0.6, 0.4, 0.2, 0][0:1]

optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=sum(NUM_EPOCHS) + 1)

best_val_loss = float("inf")

for num_epochs, lr, teacher_forcing_ratio in zip(NUM_EPOCHS, LRS, TEACHER_FORCING_RATIOS):

    print(f'Epochs: {num_epochs}  Lr: {lr}  Teacher Forcing Ratio: {teacher_forcing_ratio}')

    for epoch in range(1, num_epochs + 1):
        train_loss = model.train_one_epoch(train_loader, optimizer, criterion, teacher_forcing_ratio)
        val_loss = model.evaluate(val_loader, criterion)

        scheduler.step()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            model.save_weight('pretrained/')

        current_lr = scheduler.get_last_lr()[0]
        print(f"Epoch {epoch}: Train Loss = {train_loss:.8f}, Val Loss = {val_loss:.8f}, LR = {current_lr:.8f}")

    print("Training complete. Best Val Loss:", best_val_loss)

# Test

In [7]:
import torch
from dataset import GTSequenceDataset

SEQ_PATH = '../../.Datasets/DanceTrack/val/dancetrack0004/'
SEQ_IN_LEN = 30
SEQ_OUT_LEN = 2
SEQ_TOTAL_LEN = 32
BATCH_SIZE = 128

d = GTSequenceDataset.from_sequence(SEQ_PATH, seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN, noise=True)
sources = torch.tensor(d.sources).to(DEVICE)[:BATCH_SIZE]
targets = torch.tensor(d.targets).to(DEVICE)[:BATCH_SIZE]

o = model.inference(sources, targets, num_steps=targets.size(1) - 1)

o[:, :, 0] *= d.image_width.item()
o[:, :, 2] *= d.image_width.item()
o[:, :, 1] *= d.image_height.item()
o[:, :, 3] *= d.image_height.item()
targets[:, :, 0] *= d.image_width.item()
targets[:, :, 2] *= d.image_width.item()
targets[:, :, 1] *= d.image_height.item()
targets[:, :, 3] *= d.image_height.item()
t = targets[:, 1:]
sources[:, :, 0] *= d.image_width.item()
sources[:, :, 2] *= d.image_width.item()
sources[:, :, 1] *= d.image_height.item()
sources[:, :, 3] *= d.image_height.item()

index = 9
# t[index], o[index]
# f = (t[index] - o[index]).abs()[:, 3].mean()
f = (t - o).abs().mean()
f


tensor(210.2011, device='cuda:0')

In [6]:
d.image_height.item()

1080.0

In [16]:
targets

tensor([[[0.4595, 0.3603, 0.5068, 0.6890],
         [0.4916, 0.3975, 0.5206, 0.7048]],

        [[0.4916, 0.3975, 0.5206, 0.7048],
         [0.4530, 0.4065, 0.5024, 0.6868]],

        [[0.4530, 0.4065, 0.5024, 0.6868],
         [0.4324, 0.4058, 0.5474, 0.7003]],

        ...,

        [[0.2288, 0.5286, 0.3329, 0.7175],
         [0.2476, 0.5468, 0.3747, 0.6862]],

        [[0.2476, 0.5468, 0.3747, 0.6862],
         [0.2156, 0.5008, 0.3438, 0.7099]],

        [[0.2156, 0.5008, 0.3438, 0.7099],
         [0.2274, 0.5202, 0.3164, 0.7094]]], device='cuda:0')