# Data

In [None]:
from dataset import GTSequenceDataset
from torch.utils.data import DataLoader

SEQ_IN_LEN = 30
SEQ_OUT_LEN = 2
SEQ_TOTAL_LEN = 32
BATCH_SIZE = 128

BASE_DIR = '../../Datasets/'
train_dataset = GTSequenceDataset.from_roots([
    f'{BASE_DIR}DanceTrack/train',
    f'{BASE_DIR}MOT17/train',
    f'{BASE_DIR}MOT20/train'
], seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN, noise_prob=0.5, noise_coeff=0.5)

val_dataset = GTSequenceDataset.from_roots([
    f'{BASE_DIR}DanceTrack/val',
    f'{BASE_DIR}MOT17/val',
    f'{BASE_DIR}MOT20/val'
], seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN, noise_prob=0.5, noise_coeff=0.5)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f'Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}')

Train samples: 1276594, Val samples: 666289


# Model

In [32]:
from lstm import LSTMPredictor
from loss import LossFunction
from torch import optim

DEVICE = 'cuda'
model = LSTMPredictor(middle_dim=64, hidden_dim=256, num_layers=1).to(DEVICE)
criterion = LossFunction()



In [18]:
model.evaluate(val_loader, criterion)

0.101490776536144

In [33]:
from dataset import GTSequenceDataset
from torch.utils.data import DataLoader
# optimizer = optim.AdamW(model.parameters(), lr=2e-3, weight_decay=1e-4)
# scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=42)
best_val_loss = float("inf")

NUM_EPOCHS = [15, 10, 10, 10, 5]
PS = [0, 0.2, 0.4, 0.6, 0.8]

optimizer = optim.AdamW(model.parameters(), lr=2e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=sum(NUM_EPOCHS) + 2)


# for p, lr, num_epochs in [[0.2, 1e-3, 10], [0.4, 5e-4, 10], [0.6, 5e-4, 10]]:
for p, num_epochs in zip(PS, NUM_EPOCHS):

    print(p, num_epochs)

    SEQ_IN_LEN = 30
    SEQ_OUT_LEN = 2
    SEQ_TOTAL_LEN = 32
    BATCH_SIZE = 128

    BASE_DIR = '../../Datasets/'
    train_dataset = GTSequenceDataset.from_roots([
        f'{BASE_DIR}DanceTrack/train',
        f'{BASE_DIR}MOT17/train',
        f'{BASE_DIR}MOT20/train'
    ], seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN, noise_prob=p, noise_coeff=2)

    val_dataset = GTSequenceDataset.from_roots([
        f'{BASE_DIR}DanceTrack/val',
        f'{BASE_DIR}MOT17/val',
        f'{BASE_DIR}MOT20/val'
    ], seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN, noise_prob=0.6, noise_coeff=2)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

    # LR = 2e-3
    # NUM_EPOCHS = [10, 5, 5, 5, 5, 5][0:1]
    # LRS = [5e-4, 5e-4, 5e-4, 5e-4, 5e-4, 5e-4][0:1]
    # TEACHER_FORCING_RATIOS = [1, 0.8, 0.6, 0.4, 0.2, 0][0:1]

    

    

    # for num_epochs, lr, teacher_forcing_ratio in zip(NUM_EPOCHS, LRS, TEACHER_FORCING_RATIOS):


    for epoch in range(1, num_epochs + 1):
        train_loss = model.train_one_epoch(train_loader, optimizer, criterion, 1)
        val_loss = model.evaluate(val_loader, criterion)

        scheduler.step()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            model.save_weight('pretrained/lstm-base-m64-h256-wn-v2.pth')

        current_lr = scheduler.get_last_lr()[0]
        print(f"Epoch {epoch}: Train Loss = {train_loss:.8f}, Val Loss = {val_loss:.8f}, LR = {current_lr:.8f}")

    print("Training complete. Best Val Loss:", best_val_loss)

0 15
Epoch 1: Train Loss = 0.06414034, Val Loss = 0.08256277, LR = 0.00199818
Epoch 2: Train Loss = 0.04562454, Val Loss = 0.07837529, LR = 0.00199271
Epoch 3: Train Loss = 0.04282197, Val Loss = 0.07818616, LR = 0.00198362
Epoch 4: Train Loss = 0.04172837, Val Loss = 0.08411249, LR = 0.00197094
Epoch 5: Train Loss = 0.04091067, Val Loss = 0.07815535, LR = 0.00195472
Epoch 6: Train Loss = 0.04044936, Val Loss = 0.07928688, LR = 0.00193502
Epoch 7: Train Loss = 0.03999297, Val Loss = 0.07996182, LR = 0.00191190
Epoch 8: Train Loss = 0.03972748, Val Loss = 0.08286103, LR = 0.00188546
Epoch 9: Train Loss = 0.03924480, Val Loss = 0.08378812, LR = 0.00185578
Epoch 10: Train Loss = 0.03896793, Val Loss = 0.08029294, LR = 0.00182298
Epoch 11: Train Loss = 0.03881455, Val Loss = 0.08028612, LR = 0.00178718
Epoch 12: Train Loss = 0.03849943, Val Loss = 0.07681794, LR = 0.00174851
Epoch 13: Train Loss = 0.03817027, Val Loss = 0.08059067, LR = 0.00170711
Epoch 14: Train Loss = 0.03800686, Val Los

# Train

In [3]:
LR = 2e-3
NUM_EPOCHS = [20, 5, 5, 5, 5, 5][0:1]
LRS = [5e-4, 5e-4, 5e-4, 5e-4, 5e-4, 5e-4][0:1]
TEACHER_FORCING_RATIOS = [1, 0.8, 0.6, 0.4, 0.2, 0][0:1]

optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=sum(NUM_EPOCHS) + 1)

best_val_loss = float("inf")

for num_epochs, lr, teacher_forcing_ratio in zip(NUM_EPOCHS, LRS, TEACHER_FORCING_RATIOS):

    print(f'Epochs: {num_epochs}  Lr: {lr}  Teacher Forcing Ratio: {teacher_forcing_ratio}')

    for epoch in range(1, num_epochs + 1):
        train_loss = model.train_one_epoch(train_loader, optimizer, criterion, teacher_forcing_ratio)
        val_loss = model.evaluate(val_loader, criterion)

        scheduler.step()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            model.save_weight('pretrained/lstm-base-m64-h256-l1.pth')

        current_lr = scheduler.get_last_lr()[0]
        print(f"Epoch {epoch}: Train Loss = {train_loss:.8f}, Val Loss = {val_loss:.8f}, LR = {current_lr:.8f}")

    print("Training complete. Best Val Loss:", best_val_loss)

Epochs: 20  Lr: 0.0005  Teacher Forcing Ratio: 1
Epoch 1: Train Loss = 0.06342130, Val Loss = 0.05451159, LR = 0.00198883


KeyboardInterrupt: 

In [53]:
# model.load_weight('pretrained/lstm-base-m64-h256-wn-v2.pth')
model.load_weight('pretrained/lstm-base-m64-h256-wn.pth')

In [5]:
model.save_weight('pretrained/lstm-base-m64-h256-l1.pth')

# Test

In [60]:
import torch
from dataset import GTSequenceDataset

SEQ_PATH = '../../Datasets/MOT20/val/MOT20-01/'
# SEQ_PATH = '../../Datasets/MOT17/val/MOT17-04-FRCNN/'
# SEQ_PATH = '../../Datasets/DanceTrack/val/dancetrack0004/'
SEQ_IN_LEN = 30
SEQ_OUT_LEN = 2
SEQ_TOTAL_LEN = 32
BATCH_SIZE = 128

d = GTSequenceDataset.from_sequence(SEQ_PATH, seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN)
# d = GTSequenceDataset.from_sequence(SEQ_PATH, seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN, noise_prob=0.6, noise_coeff=2)
sources = torch.tensor(d.sources).to(DEVICE)[:BATCH_SIZE]
targets = torch.tensor(d.targets).to(DEVICE)[:BATCH_SIZE]

o = model.inference(sources, targets, num_steps=targets.size(1) - 1)

o[:, :, 0] *= d.image_width.item()
o[:, :, 2] *= d.image_width.item()
o[:, :, 1] *= d.image_height.item()
o[:, :, 3] *= d.image_height.item()
targets[:, :, 0] *= d.image_width.item()
targets[:, :, 2] *= d.image_width.item()
targets[:, :, 1] *= d.image_height.item()
targets[:, :, 3] *= d.image_height.item()
t = targets[:, 1:]
sources[:, :, 0] *= d.image_width.item()
sources[:, :, 2] *= d.image_width.item()
sources[:, :, 1] *= d.image_height.item()
sources[:, :, 3] *= d.image_height.item()

index = 9
# t[index], o[index]
# f = (t[index] - o[index]).abs()[:, 3].mean()
f = (t - o).abs().mean()
f


tensor(0.6855, device='cuda:0', grad_fn=<MeanBackward0>)

In [51]:
t[0]

tensor([[ 294.3531,  811.7768,  413.6157, 1081.0000]], device='cuda:0')

In [52]:
o[0]

tensor([[ 286.7705,  810.1767,  418.4162, 1081.0775]], device='cuda:0',
       grad_fn=<SelectBackward0>)

In [6]:
d.image_height.item()

1080.0

In [16]:
targets

tensor([[[0.4595, 0.3603, 0.5068, 0.6890],
         [0.4916, 0.3975, 0.5206, 0.7048]],

        [[0.4916, 0.3975, 0.5206, 0.7048],
         [0.4530, 0.4065, 0.5024, 0.6868]],

        [[0.4530, 0.4065, 0.5024, 0.6868],
         [0.4324, 0.4058, 0.5474, 0.7003]],

        ...,

        [[0.2288, 0.5286, 0.3329, 0.7175],
         [0.2476, 0.5468, 0.3747, 0.6862]],

        [[0.2476, 0.5468, 0.3747, 0.6862],
         [0.2156, 0.5008, 0.3438, 0.7099]],

        [[0.2156, 0.5008, 0.3438, 0.7099],
         [0.2274, 0.5202, 0.3164, 0.7094]]], device='cuda:0')