In [47]:
from tracknet.model import GridTrackNetModel
from tracknet.dataset import TrackNet
from torchinfo import summary
import wandb

In [31]:
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

  ········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmrmorais[0m ([33mmrmorais-home[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [57]:
wandb.init(project='GridTrackNet', name='wandb_train_l1_adam')

model = GridTrackNetModel()

wandb.watch(model, log_freq=500)

0,1
epoch,▁▂▃▅▆▇█
train_loss,▂▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁

0,1
epoch,6.0
train_loss,0.0084
val_acc,0.0
val_loss,0.08465


In [58]:
summary(model, input_size=(10, 15, 432, 768))

Layer (type:depth-idx)                   Output Shape              Param #
GridTrackNetModel                        [10, 15, 27, 48]          --
├─ConvBlock: 1-1                         [10, 64, 432, 768]        --
│    └─Sequential: 2-1                   [10, 64, 432, 768]        --
│    │    └─Conv2d: 3-1                  [10, 64, 432, 768]        8,704
│    │    └─ReLU: 3-2                    [10, 64, 432, 768]        --
│    │    └─BatchNorm2d: 3-3             [10, 64, 432, 768]        128
├─ConvBlock: 1-2                         [10, 64, 432, 768]        --
│    └─Sequential: 2-2                   [10, 64, 432, 768]        --
│    │    └─Conv2d: 3-4                  [10, 64, 432, 768]        36,928
│    │    └─ReLU: 3-5                    [10, 64, 432, 768]        --
│    │    └─BatchNorm2d: 3-6             [10, 64, 432, 768]        128
├─MaxPool2d: 1-3                         [10, 64, 216, 384]        --
├─ConvBlock: 1-4                         [10, 128, 216, 384]       --
│    └

In [59]:
import torch
import torch.nn as nn
from torch.optim import Adadelta, Adam
import numpy as np
from torch import cuda
from tqdm import tqdm

In [64]:
def validate(model, criterion, val_loader):
    device = "cuda" if cuda.is_available() else "cpu"
    model.eval()

    corrects = []
    losses = []

    for _, (instances, label) in enumerate(val_loader):
        with torch.no_grad():
            label = label.permute(0, 1, 4, 2, 3).reshape(val_loader.batch_size, 3 * 5, 27, 48).to(device, dtype=torch.float32)

            instances = instances.to(device, dtype=torch.float32)
            outputs = model(instances)

            loss = criterion(outputs, label)
            losses.append(loss.item() * instances.size(0))

            for i in range(val_loader.batch_size):
                # Each 3 items is a grid confidence for 1 of 5 frames
                for j in range(0, 15, 3):
                    gt = np.argmax(label[i][j].flatten().cpu())
                    gt_x, gt_y = np.unravel_index(gt, (27, 48))

                    out = np.argmax(outputs[i][j].flatten().cpu())
                    out_x, out_y = np.unravel_index(out, (27, 48))
                    print(gt_x, out_x, gt_y, out_y)

                    corrects.append(gt_x == out_x and gt_y == out_y)

    acc = sum(corrects) / len(corrects)
    avg_loss = np.average(losses)

    return acc, avg_loss

In [31]:
from torch.utils.data import DataLoader

val_dataset = TrackNet('compiled_dataset')
val_loader = DataLoader(val_dataset, batch_size=10)
validate(model, val_loader)

TypeError: TrackNet.__init__() missing 1 required positional argument: 'files'

In [61]:

def train(model, train_loader, val_loader, epochs = 50):
    device = "cuda" if cuda.is_available() else "cpu"
    model.to(device)

    # criterion = nn.MSELoss()
    # criterion = nn.CrossEntropyLoss()
    # optimizer = Adadelta(lr=1.0, params=model.parameters(), weight_decay=0.01)

    criterion = nn.L1Loss()
    optimizer = Adam(model.parameters(), lr=1e-3, weight_decay=0.01)

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        total_samples = 0

        for instances, label in tqdm(train_loader):
            optimizer.zero_grad()

            label = label.permute(0, 1, 4, 2, 3).reshape(train_loader.batch_size, 3 * 5, 27, 48).to(device, dtype=torch.float32)

            instances = instances.to(device, dtype=torch.float32)
            outputs = model(instances)

            # print(outputs.shape, label.shape)

            loss = criterion(outputs, label)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * instances.size(0)
            total_samples += instances.size(0)

            # print(loss)
            wandb.log({ 'train_loss': loss })

        avg_loss = running_loss / total_samples

        val_acc, val_loss = validate(model, criterion, val_loader)

        print(f"Epoch {epoch+1}/{epochs}: Train Loss {avg_loss:.4f}, Val Acc={val_acc:.4f}, Val Loss={val_loss:.4f}")

        wandb.log({
            'epoch': epoch,
            'train_loss': avg_loss,
            'val_acc': val_acc,
            'val_loss': val_loss,
        })


In [None]:
wandb.finish()

In [62]:
import os
import random

files = os.listdir('compiled_dataset')
files = list(filter(lambda x: x.endswith('.hdf5'), files))
random.shuffle(files)

val_files = files[:6]
train_files = files[6:]

val_files, train_files

(['060.hdf5', '031.hdf5', '010.hdf5', '059.hdf5', '016.hdf5', '052.hdf5'],
 ['024.hdf5',
  '013.hdf5',
  '007.hdf5',
  '061.hdf5',
  '023.hdf5',
  '006.hdf5',
  '026.hdf5',
  '030.hdf5',
  '022.hdf5',
  '001.hdf5',
  '011.hdf5',
  '036.hdf5',
  '019.hdf5',
  '000.hdf5',
  '039.hdf5',
  '018.hdf5',
  '044.hdf5',
  '005.hdf5',
  '012.hdf5',
  '037.hdf5',
  '027.hdf5',
  '015.hdf5',
  '050.hdf5',
  '047.hdf5',
  '034.hdf5',
  '035.hdf5',
  '046.hdf5',
  '054.hdf5',
  '017.hdf5',
  '028.hdf5',
  '058.hdf5',
  '003.hdf5',
  '045.hdf5',
  '025.hdf5',
  '014.hdf5',
  '032.hdf5',
  '038.hdf5',
  '041.hdf5',
  '008.hdf5',
  '049.hdf5',
  '042.hdf5',
  '009.hdf5',
  '021.hdf5',
  '053.hdf5',
  '029.hdf5',
  '057.hdf5',
  '055.hdf5',
  '040.hdf5',
  '051.hdf5',
  '033.hdf5',
  '056.hdf5',
  '043.hdf5',
  '004.hdf5',
  '020.hdf5',
  '048.hdf5',
  '002.hdf5'])

In [63]:
from torch.utils.data import DataLoader

val_dataset = TrackNet('compiled_dataset', val_files, debug=False)
val_loader = DataLoader(val_dataset, batch_size=10, pin_memory=True, num_workers=6)

train_dataset = TrackNet('compiled_dataset', train_files, debug=False)
train_loader = DataLoader(train_dataset, batch_size=10, pin_memory=True, num_workers=8)

train(model, train_loader, val_loader)

100%|██████████| 280/280 [01:28<00:00,  3.16it/s]


Epoch 1/50: Train Loss 0.1007, Val Acc=0.0000, Val Loss=3.5871


100%|██████████| 280/280 [01:26<00:00,  3.22it/s]


Epoch 2/50: Train Loss 0.0467, Val Acc=0.0000, Val Loss=0.0742


100%|██████████| 280/280 [01:25<00:00,  3.26it/s]


Epoch 3/50: Train Loss 0.0153, Val Acc=0.0000, Val Loss=0.1167


100%|██████████| 280/280 [01:25<00:00,  3.26it/s]


Epoch 4/50: Train Loss 0.0153, Val Acc=0.0000, Val Loss=0.1352


100%|██████████| 280/280 [01:24<00:00,  3.31it/s]


Epoch 5/50: Train Loss 0.0151, Val Acc=0.0000, Val Loss=0.1539


100%|██████████| 280/280 [01:24<00:00,  3.32it/s]


Epoch 6/50: Train Loss 0.0150, Val Acc=0.0000, Val Loss=0.1698


100%|██████████| 280/280 [01:23<00:00,  3.36it/s]


Epoch 7/50: Train Loss 0.0148, Val Acc=0.0000, Val Loss=0.1243


100%|██████████| 280/280 [01:24<00:00,  3.30it/s]


Epoch 8/50: Train Loss 0.0149, Val Acc=0.0000, Val Loss=0.1503


100%|██████████| 280/280 [01:31<00:00,  3.07it/s]


Epoch 9/50: Train Loss 0.0151, Val Acc=0.0000, Val Loss=0.1542


100%|██████████| 280/280 [01:24<00:00,  3.32it/s]


Epoch 10/50: Train Loss 0.0151, Val Acc=0.0000, Val Loss=0.1602


100%|██████████| 280/280 [01:26<00:00,  3.23it/s]


Epoch 11/50: Train Loss 0.0151, Val Acc=0.0000, Val Loss=0.1690


100%|██████████| 280/280 [01:26<00:00,  3.25it/s]


Epoch 12/50: Train Loss 0.0150, Val Acc=0.0000, Val Loss=0.1687


100%|██████████| 280/280 [01:31<00:00,  3.08it/s]


Epoch 14/50: Train Loss 0.0150, Val Acc=0.0000, Val Loss=0.1692


100%|██████████| 280/280 [01:29<00:00,  3.15it/s]


Epoch 15/50: Train Loss 0.0150, Val Acc=0.0000, Val Loss=0.1155


100%|██████████| 280/280 [01:28<00:00,  3.17it/s]


Epoch 16/50: Train Loss 0.0150, Val Acc=0.0000, Val Loss=0.1589


100%|██████████| 280/280 [01:30<00:00,  3.08it/s]


Epoch 17/50: Train Loss 0.0150, Val Acc=0.0000, Val Loss=0.1570


100%|██████████| 280/280 [01:32<00:00,  3.02it/s]


Epoch 18/50: Train Loss 0.0149, Val Acc=0.0000, Val Loss=0.1526


100%|██████████| 280/280 [01:30<00:00,  3.09it/s]


Epoch 19/50: Train Loss 0.0149, Val Acc=0.0000, Val Loss=0.1612


100%|██████████| 280/280 [01:32<00:00,  3.03it/s]


Epoch 20/50: Train Loss 0.0149, Val Acc=0.0000, Val Loss=0.0978


100%|██████████| 280/280 [01:33<00:00,  2.98it/s]


Epoch 21/50: Train Loss 0.0149, Val Acc=0.0000, Val Loss=0.1590


100%|██████████| 280/280 [01:31<00:00,  3.06it/s]


Epoch 22/50: Train Loss 0.0149, Val Acc=0.0000, Val Loss=0.1692


100%|██████████| 280/280 [01:34<00:00,  2.95it/s]


Epoch 23/50: Train Loss 0.0149, Val Acc=0.0000, Val Loss=0.1812


100%|██████████| 280/280 [01:34<00:00,  2.96it/s]


Epoch 24/50: Train Loss 0.0149, Val Acc=0.0000, Val Loss=0.0784


100%|██████████| 280/280 [01:30<00:00,  3.10it/s]


Epoch 25/50: Train Loss 0.0148, Val Acc=0.0000, Val Loss=0.1647


100%|██████████| 280/280 [01:32<00:00,  3.04it/s]


Epoch 26/50: Train Loss 0.0148, Val Acc=0.0000, Val Loss=0.1543


 41%|████      | 114/280 [00:42<01:01,  2.69it/s]


KeyboardInterrupt: 