# Data

In [1]:
from dataset import GTSequenceDataset
from torch.utils.data import DataLoader

SEQ_IN_LEN = 10
SEQ_OUT_LEN = 10
SEQ_TOTAL_LEN = 20
BATCH_SIZE = 1024

BASE_DIR = '../../Datasets/'
train_dataset = GTSequenceDataset.from_roots([
    f'{BASE_DIR}DanceTrack/train',
    f'{BASE_DIR}MOT17/train',
    f'{BASE_DIR}MOT20/train'
], seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN)

val_dataset = GTSequenceDataset.from_roots([
    f'{BASE_DIR}DanceTrack/val',
    f'{BASE_DIR}MOT17/val',
    f'{BASE_DIR}MOT20/val'
], seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)

print(f'Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}')

Train samples: 1313429, Val samples: 689264


# Model

In [2]:
from lstm_decoder import LSTMPredictor
from loss import LossFunction
from torch import optim

DEVICE = 'cuda'
# model = MotionTransformer(num_enc_layers=1, num_dec_layers=1, dim_ff=64, d_model=32, dropout=0, nhead=4).to(DEVICE)
# model = ImprovedLSTMPredictor(middle_dim=128, hidden_dim=512, num_layers=1).to(DEVICE)
model = LSTMPredictor(middle_dim=128, hidden_dim=512, num_layers=1).to(DEVICE)
criterion = LossFunction()

In [27]:
model.evaluate(val_loader, criterion)

0.06770345217165835

In [8]:
from dataset import GTSequenceDataset
from torch.utils.data import DataLoader
# optimizer = optim.AdamW(model.parameters(), lr=2e-3, weight_decay=1e-4)
# scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=42)
best_val_loss = float("inf")

NUM_EPOCHS = [15, 10, 10, 10, 5]
PS = [0, 0.2, 0.4, 0.6, 0.8]

optimizer = optim.Adam(model.parameters(), lr=2e-3)
# optimizer = optim.AdamW(model.parameters(), lr=2e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=sum(NUM_EPOCHS) + 2)


# for p, lr, num_epochs in [[0.2, 1e-3, 10], [0.4, 5e-4, 10], [0.6, 5e-4, 10]]:
for p, num_epochs in zip(PS, NUM_EPOCHS):

    

    SEQ_IN_LEN = 30
    SEQ_OUT_LEN = 2
    SEQ_TOTAL_LEN = 32
    BATCH_SIZE = 512

    BASE_DIR = '../../Datasets/'
    train_dataset = GTSequenceDataset.from_roots([
        f'{BASE_DIR}DanceTrack/train',
        f'{BASE_DIR}MOT17/train',
        f'{BASE_DIR}MOT20/train'
    ], seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN, noise_prob=p, noise_coeff=2)

    val_dataset = GTSequenceDataset.from_roots([
        f'{BASE_DIR}DanceTrack/val',
        f'{BASE_DIR}MOT17/val',
        f'{BASE_DIR}MOT20/val'
    ], seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN, noise_prob=0.6, noise_coeff=2)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)

    # LR = 2e-3
    # NUM_EPOCHS = [10, 5, 5, 5, 5, 5][0:1]
    # LRS = [5e-4, 5e-4, 5e-4, 5e-4, 5e-4, 5e-4][0:1]
    # TEACHER_FORCING_RATIOS = [1, 0.8, 0.6, 0.4, 0.2, 0][0:1]

    print(p, num_epochs)

    

    # for num_epochs, lr, teacher_forcing_ratio in zip(NUM_EPOCHS, LRS, TEACHER_FORCING_RATIOS):


    for epoch in range(1, num_epochs + 1):
        train_loss = model.train_one_epoch(train_loader, optimizer, criterion, 1)
        val_loss = model.evaluate(val_loader, criterion)

        scheduler.step()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            model.save_weight('pretrained/lstm-improved-m128-h512-wn-lf3.pth')

        current_lr = scheduler.get_last_lr()[0]
        print(f"Epoch {epoch}: Train Loss = {train_loss:.8f}, Val Loss = {val_loss:.8f}, LR = {current_lr:.8f}")

    print("Training complete. Best Val Loss:", best_val_loss)

0 15
Epoch 1: Train Loss = 0.00001353, Val Loss = 0.00001081, LR = 0.00199818
Epoch 2: Train Loss = 0.00000664, Val Loss = 0.00001072, LR = 0.00199271
Epoch 3: Train Loss = 0.00000608, Val Loss = 0.00001097, LR = 0.00198362
Epoch 4: Train Loss = 0.00000587, Val Loss = 0.00001106, LR = 0.00197094
Epoch 5: Train Loss = 0.00000573, Val Loss = 0.00001093, LR = 0.00195472
Epoch 6: Train Loss = 0.00000562, Val Loss = 0.00001103, LR = 0.00193502
Epoch 7: Train Loss = 0.00000553, Val Loss = 0.00001065, LR = 0.00191190
Epoch 8: Train Loss = 0.00000544, Val Loss = 0.00001123, LR = 0.00188546
Epoch 9: Train Loss = 0.00000539, Val Loss = 0.00001103, LR = 0.00185578
Epoch 10: Train Loss = 0.00000533, Val Loss = 0.00001081, LR = 0.00182298
Epoch 11: Train Loss = 0.00000527, Val Loss = 0.00001084, LR = 0.00178718
Epoch 12: Train Loss = 0.00000522, Val Loss = 0.00001070, LR = 0.00174851
Epoch 13: Train Loss = 0.00000517, Val Loss = 0.00001056, LR = 0.00170711
Epoch 14: Train Loss = 0.00000513, Val Los

KeyboardInterrupt: 

# 0.0678

# Train

In [5]:
LR = 1e-4
NUM_EPOCHS = [20, 5, 5, 5, 5, 10][5:]
LRS = [5e-4, 5e-4, 5e-4, 5e-4, 5e-4, 5e-4][5:]
TEACHER_FORCING_RATIOS = [1, 0.8, 0.6, 0.4, 0.2, 0][5:]

optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=sum(NUM_EPOCHS) + 1)

# best_val_loss = 0.14
best_val_loss = float("inf")

for num_epochs, lr, teacher_forcing_ratio in zip(NUM_EPOCHS, LRS, TEACHER_FORCING_RATIOS):

    print(f'Epochs: {num_epochs}  Lr: {lr}  Teacher Forcing Ratio: {teacher_forcing_ratio}')

    for epoch in range(1, num_epochs + 1):
        train_loss = model.train_one_epoch(train_loader, optimizer, criterion, teacher_forcing_ratio)
        val_loss = model.evaluate(val_loader, criterion)

        scheduler.step()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            model.save_weight('pretrained/lstm-decoder-only-m128-h512-l1')
            # model.save_weight('pretrained/transformer-dm16-df32-nh4-e1-d1.pth')

        current_lr = scheduler.get_last_lr()[0]
        print(f"Epoch {epoch}: Train Loss = {train_loss:.8f}, Val Loss = {val_loss:.8f}, LR = {current_lr:.8f}")

    print("Training complete. Best Val Loss:", best_val_loss)

Epochs: 10  Lr: 0.0005  Teacher Forcing Ratio: 0
Epoch 1: Train Loss = 0.03851746, Val Loss = 0.04302038, LR = 0.00009797
Epoch 2: Train Loss = 0.03845023, Val Loss = 0.04247594, LR = 0.00009206
Epoch 3: Train Loss = 0.03843306, Val Loss = 0.04284758, LR = 0.00008274
Epoch 4: Train Loss = 0.03840928, Val Loss = 0.04230886, LR = 0.00007077
Epoch 5: Train Loss = 0.03833015, Val Loss = 0.04259208, LR = 0.00005712
Epoch 6: Train Loss = 0.03824751, Val Loss = 0.04174694, LR = 0.00004288
Epoch 7: Train Loss = 0.03818129, Val Loss = 0.04170493, LR = 0.00002923
Epoch 8: Train Loss = 0.03813975, Val Loss = 0.04163438, LR = 0.00001726
Epoch 9: Train Loss = 0.03809372, Val Loss = 0.04156255, LR = 0.00000794
Epoch 10: Train Loss = 0.03806962, Val Loss = 0.04157350, LR = 0.00000203
Training complete. Best Val Loss: 0.041562553334881715


In [3]:
# model.load_weight('pretrained/lstm-improved-m64-h256-l1-wn.pth')
# model.load_weight('pretrained/lstm-base-m128-h512-wn.pth')
# model.load_weight('pretrained/transformer-dm16-df32-nh4-e1-d1-wn.pth')
model.load_weight('pretrained/lstm-decoder-only-m128-h512-l1')
# model.load_weight('pretrained/lstm-base-m64-h256-wn-v2.pth')

In [4]:
model.evaluate(val_loader, criterion)

0.04232833784305731

In [None]:
9.35e-6

In [5]:
model.save_weight('pretrained/lstm-base-m64-h256-l1.pth')

# Test

In [11]:
import torch
from dataset import GTSequenceDataset

# SEQ_PATH = '../../Datasets/MOT20/val/MOT20-01/'
# SEQ_PATH = '../../Datasets/MOT17/val/MOT17-04-FRCNN/'
SEQ_PATH = '../../Datasets/DanceTrack/val/dancetrack0010///'
SEQ_IN_LEN = 10
SEQ_OUT_LEN = 2
SEQ_TOTAL_LEN = 12
BATCH_SIZE = 512

d = GTSequenceDataset.from_sequence(SEQ_PATH, seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN)
# d = GTSequenceDataset.from_sequence(SEQ_PATH, seq_in_len=SEQ_IN_LEN, seq_out_len=SEQ_OUT_LEN, seq_total_len=SEQ_TOTAL_LEN, noise_prob=0.6, noise_coeff=2)
sources = torch.tensor(d.sources).to(DEVICE)[:BATCH_SIZE]
targets = torch.tensor(d.targets).to(DEVICE)[:BATCH_SIZE]

o = model.inference(sources, targets, num_steps=targets.size(1) - 1)

o[:, :, 0] *= d.image_width.item()
o[:, :, 2] *= d.image_width.item()
o[:, :, 1] *= d.image_height.item()
o[:, :, 3] *= d.image_height.item()
targets[:, :, 0] *= d.image_width.item()
targets[:, :, 2] *= d.image_width.item()
targets[:, :, 1] *= d.image_height.item()
targets[:, :, 3] *= d.image_height.item()
t = targets[:, 1:]
sources[:, :, 0] *= d.image_width.item()
sources[:, :, 2] *= d.image_width.item()
sources[:, :, 1] *= d.image_height.item()
sources[:, :, 3] *= d.image_height.item()

index = 9
# t[index], o[index]
# f = (t[index] - o[index]).abs()[:, 3].mean()
f = (t - o).abs().mean()
f


tensor(5.2258, device='cuda:0', grad_fn=<MeanBackward0>)

In [8]:
targets.shape

torch.Size([512, 10, 4])

In [51]:
t[0]

tensor([[ 294.3531,  811.7768,  413.6157, 1081.0000]], device='cuda:0')

In [52]:
o[0]

tensor([[ 286.7705,  810.1767,  418.4162, 1081.0775]], device='cuda:0',
       grad_fn=<SelectBackward0>)

In [6]:
d.image_height.item()

1080.0

In [16]:
targets

tensor([[[0.4595, 0.3603, 0.5068, 0.6890],
         [0.4916, 0.3975, 0.5206, 0.7048]],

        [[0.4916, 0.3975, 0.5206, 0.7048],
         [0.4530, 0.4065, 0.5024, 0.6868]],

        [[0.4530, 0.4065, 0.5024, 0.6868],
         [0.4324, 0.4058, 0.5474, 0.7003]],

        ...,

        [[0.2288, 0.5286, 0.3329, 0.7175],
         [0.2476, 0.5468, 0.3747, 0.6862]],

        [[0.2476, 0.5468, 0.3747, 0.6862],
         [0.2156, 0.5008, 0.3438, 0.7099]],

        [[0.2156, 0.5008, 0.3438, 0.7099],
         [0.2274, 0.5202, 0.3164, 0.7094]]], device='cuda:0')