In [1]:
import torch

import composer
from composer.datasets import coco_mmdet
from composer.models import composer_yolox
from torch.utils.data import DataLoader
from composer.datasets.coco_mmdet import mmdet_collate, mmdet_get_num_samples
from composer.core.data_spec import DataSpec
from composer.loggers import InMemoryLogger, LogLevel, WandBLogger



import logging, sys # disable logging in notebook
logging.disable(sys.maxsize)

torch.manual_seed(42) # For replicability

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x7f864c9a2a10>

In [2]:
train_dataset = coco_mmdet(path='../../data/coco', split='train')
val_dataset = coco_mmdet(path='../../data/coco', split='val')

loading annotations into memory...
Done (t=12.60s)
creating index...
index created!
loading annotations into memory...
Done (t=1.80s)
creating index...
index created!


In [3]:
model = composer_yolox(model_name='yolox-s')

In [3]:
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=mmdet_collate, shuffle=True, drop_last=True, num_workers=8)
val_loader = DataLoader(val_dataset, batch_size=batch_size, collate_fn=mmdet_collate, shuffle=False, num_workers=8)

In [6]:
train_loader.persistent_workers

False

In [5]:
optimizer = composer.optim.DecoupledSGDW(
    model.parameters(), # Model parameters to update
    lr=0.01, # Peak learning rate
    momentum=0.9,
    weight_decay=5e-4,
    nesterov=True # If this looks large, it's because its not scaled by the LR as in non-decoupled weight decay
)

In [6]:
lr_scheduler = composer.optim.CosineAnnealingWithWarmupScheduler(
    t_warmup="30ep", # Warm up over 30 epoch
)

In [7]:
train_epochs = "300ep" # Train for 3 epochs because we're assuming Colab environment and hardware

trainer = composer.trainer.Trainer(
    model=model,
    train_dataloader=DataSpec(train_loader, get_num_samples_in_batch=mmdet_get_num_samples),
    eval_dataloader=DataSpec(val_loader, get_num_samples_in_batch=mmdet_get_num_samples),
    max_duration=train_epochs,
    optimizers=optimizer,
    schedulers=lr_scheduler,
    device="gpu" if torch.cuda.is_available() else "cpu",
    grad_accum=1,
    loggers=[InMemoryLogger(log_level=LogLevel.BATCH), WandBLogger(project='yolox-test')])


[34m[1mwandb[0m: Currently logged in as: [33maustin-j[0m ([33mmosaic-ml[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
trainer.fit()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
train          Epoch   0:   68%|| 2526/3696 [15:39<08:00,  2.44ba/s, loss/train=11.6581]         

KeyboardInterrupt: 

: 

In [None]:
trainer.state.current_metrics

In [None]:
WandBLogger(project='yolox-test')