In [1]:
import os
import sys
sys.path.append('../src')

import torch
import torch.utils.benchmark as benchmark
from pathlib import Path

from transformer import Transformer
from trainer import run_one_epoch, init_metrics
from utils import get_loss, get_optimizer, get_lr_scheduler
from utils.get_data import get_data_loader, get_dataset

torch.set_num_threads(10)

In [2]:
device = 'cuda:0'
dataset_name = 'jetclass'
batch_size = 1
model_configs = {
    'block_size': 100,
    'n_hashes': 3,
    'num_regions': 150,
    'num_heads': 8,
    'h_dim': 24,
    'n_layers': 4,
    'num_w_per_dist': 10,
}


In [3]:
dataset_dir = Path('../data/') / dataset_name.split("-")[0]
dataset = get_dataset(dataset_name, dataset_dir)

In [4]:
loaders = get_data_loader(dataset, dataset.idx_split, batch_size=batch_size)

In [5]:
sample = dataset[0]
print("Shape of data.pos:", sample.pos.shape)
print("Shape of data.x:", sample.x.shape)
print("Shape of data.coords:", sample.coords.shape)
print("Shape of data.y:", sample.y.shape)
print(sample.y)
# print("x dim:", dataset.x_dim)
# print("coords dim:", dataset.coords_dim)

# model = Transformer(in_dim=dataset.x_dim, coords_dim=dataset.coords_dim, num_classes=dataset.num_classes, **model_configs).to(device)
model = Transformer(in_dim=4, coords_dim=dataset.coords_dim, num_classes=10, **model_configs).to(device)

Shape of data.pos: torch.Size([128, 2])
Shape of data.x: torch.Size([128, 4])
Shape of data.coords: torch.Size([128, 4])
Shape of data.y: torch.Size([10])
tensor([0, 1, 0, 0, 0, 0, 0, 0, 0, 0])


In [6]:
criterion = get_loss('crossentropy', None)
metrics = init_metrics('jetclass')

In [7]:
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
config = {
    'optimizer_name': 'adam',
    'optimizer_kwargs': {'lr': 1.0e-3},
    'lr_scheduler_name': 'impatient',
    'lr_scheduler_kwargs': {
        'factor': 0.5,
        'patience': 20,
        'mode': 'min',
        'num_training_steps': 1
    },
    
}
# Get optimizer
optimizer = get_optimizer(model.parameters(), config['optimizer_name'], config['optimizer_kwargs'])
# Get learning rate scheduler
lr_scheduler = get_lr_scheduler(optimizer, config['lr_scheduler_name'], config['lr_scheduler_kwargs'])

In [8]:
for _ in range(10): 
    train_res = run_one_epoch(model, optimizer, criterion, loaders["train"], "train", 0, device, metrics, lr_scheduler)
    print(train_res)

[Epoch 0] train, loss: 0.0001, acc: 0.1000: 100%|██████████| 60000/60000 [18:40<00:00, 53.54it/s] 

{'loss': 6.213477482241399e-05, 'accuracy': 0.10001333333333333}



