In [1]:
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
from PIL import Image
from omegaconf import DictConfig
from torch.utils.data import DataLoader

import torch
import torchmetrics
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
import numpy as np

from dataset.cub import CUB200
from model.xfg import XFG
from util import WarmupLinearSchedule

In [2]:
config = DictConfig({
    "patch_size": 32,
    "split": "overlap",
    "slide_step": 24,
    "hidden_size": 768,
    "dropout": 0.1,
    "max_len": 100,
    "classifier": "token",
    "transformer": {
        "mlp_dim": 3072,
        "num_heads": 12,
        "num_layers": 12,
        "num_layers_cross": 12,
        "attention_dropout_rate": 0.0,
    },
    "num_classes": 200,
    "batch_size": 16,
    "num_workers": 8,
    "image_size": 448,
    "lr": 3e-2,
    "seed": 42,
    "momentum": 0.9,
    "epoch": 30,
    "gpus": [0],
    "logger": True,
    "pretrained_dir": "./pretrained/vit/imagenet21k_ViT-B_32.npz",
})

In [3]:
train_transform=transforms.Compose([
    transforms.Resize((600, 600), InterpolationMode.BILINEAR),
    transforms.RandomCrop((448, 448)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
train_set = CUB200(root="./data", train=True, caption=True, transform=train_transform)

In [4]:
# model = XFG(config)
# model.load_from(np.load(config.pretrained_dir))
# imgs, txts, targets = train_set[0]
# imgs = torch.Tensor(imgs).unsqueeze(0)
# txts = torch.Tensor(txts)
# model(imgs, txts).shape

In [6]:
if config.logger:
    from pytorch_lightning.loggers import WandbLogger
    logger = WandbLogger(
        project="xfg",
        name=f"vit"
    )
else:
    logger = pl.loggers.TestTubeLogger(
        "output", name=f"vit")
    logger.log_hyperparams(config)

pl.seed_everything(config.seed)
trainer = pl.Trainer(
    precision=16,
    deterministic=True,
    check_val_every_n_epoch=1,
    gpus=config.gpus,
    logger=logger,
    max_epochs=config.epoch,
    weights_summary="top",
    # accelerator='ddp',
)

model = LitXFG(config)
trainer.fit(model)
trainer.test()

Global seed set to 42
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
load_pretrained: grid-size from 7 to 18
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlorenzopark[0m (use `wandb login --relogin` to force relogin)



  | Name           | Type     | Params
--------------------------------------------
0 | model          | XFG      | 95.4 M
1 | train_accuracy | Accuracy | 0     
2 | val_accuracy   | Accuracy | 0     
3 | test_accuracy  | Accuracy | 0     
--------------------------------------------
95.4 M    Trainable params
0         Non-trainable params
95.4 M    Total params
381.747   Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

  value = torch.tensor(value, device=device, dtype=torch.float)
Global seed set to 42


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…



MisconfigurationException: `.test(ckpt_path="best")` is set but `ModelCheckpoint` is not configured to save the best model.