In [2]:
import timm
from timm.loss import BinaryCrossEntropy
from timm.optim import create_optimizer_v2
import torch
from pytorch_accelerated.callbacks import SaveBestModelCallback
from pytorch_accelerated.trainer import DEFAULT_CALLBACKS

from src.data.datasets.coin_data import CoinData, CoinDataFolder
from src.training.trainer import TimmMixupTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
print("torch.cuda.is_available():", torch.cuda.is_available())
print("torch.cuda.device_count():", torch.cuda.device_count())
print("torch.backends.mkl.is_available():", torch.backends.mkl.is_available())
print("torch.backends.cudnn.is_available():", torch.backends.cudnn.is_available())
print("torch.backends.cuda.is_built():", torch.backends.cuda.is_built())
print("torch.backends.mkldnn.is_available():", torch.backends.mkldnn.is_available())

torch.cuda.is_available(): True
torch.cuda.device_count(): 1
torch.backends.mkl.is_available(): True
torch.backends.cudnn.is_available(): True
torch.backends.cuda.is_built(): True
torch.backends.mkldnn.is_available(): True


In [4]:
%matplotlib inline

# Enable autoreloading of imported modules.
%load_ext autoreload
%autoreload 2

In [5]:
# Set training arguments, hardcoded here for clarity
image_size = (224, 224)
lr = 5e-3
smoothing = 0.1
mixup = 0.2
cutmix = 1.0
batch_size = 16
bce_target_thresh = 0.2
num_epochs = 10

In [6]:
# load data
coin_data = CoinData()
num_classes = len(coin_data.images_and_targets)

../data/raw/CN_dataset_04_23/data_types_example
[('../data/raw/CN_dataset_04_23/data_types_example/1/CN_type_1_cn_coin_8022_p.jpg', 0), ('../data/raw/CN_dataset_04_23/data_types_example/1/CN_type_1_MK_18203122_cn_coin_6383_o.jpg', 0), ('../data/raw/CN_dataset_04_23/data_types_example/2/CN_type_2_cn_coin_8024_p.jpg', 1), ('../data/raw/CN_dataset_04_23/data_types_example/3/CN_type_3_BNF_Platzhalter_cn_coin_11904_o.jpg', 2), ('../data/raw/CN_dataset_04_23/data_types_example/3/CN_type_3_MK_18247614_cn_coin_6696_o.jpg', 2), ('../data/raw/CN_dataset_04_23/data_types_example/5/CN_type_5_cn_coin_7685_p.jpg', 3), ('../data/raw/CN_dataset_04_23/data_types_example/6/CN_type_6_cn_coin_7686_p.jpg', 4), ('../data/raw/CN_dataset_04_23/data_types_example/8/CN_type_8_cn_coin_7689_p.jpg', 5), ('../data/raw/CN_dataset_04_23/data_types_example/8/CN_type_8_cn_coin_15352_p.jpg', 5), ('../data/raw/CN_dataset_04_23/data_types_example/11/CN_type_11_cn_coin_8036_p.jpg', 6), ('../data/raw/CN_dataset_04_23/data_t

In [7]:
mixup_args = dict(
    mixup_alpha=mixup,
    cutmix_alpha=cutmix,
    label_smoothing=smoothing,
    num_classes=num_classes,
)

In [8]:
# Create model using timm
model = timm.create_model(
    "resnet34", pretrained=False, num_classes=num_classes, #drop_path_rate=0.05
)

In [9]:
# Load data config associated with the model to use in data augmentation pipeline
data_config = timm.data.resolve_data_config({}, model=model, verbose=True)
data_mean = data_config["mean"]
data_std = data_config["std"]

In [10]:
train_dataset, eval_dataset = coin_data.generate_train_val_datasets(val_pct=0.3, image_size=image_size, data_mean=data_mean, data_std=data_std)

In [11]:
train_dataset.__getitem__(0)

(tensor([[[-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          ...,
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179]],
 
         [[-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          ...,
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357]],
 
         [[-1.8044, -1.8044, -1.8044,  ..., -1.8044, -1.8044, -1.8044],
          [-1.8044, -1.8044,

In [12]:
# Create optimizer
optimizer = create_optimizer_v2(
    model, opt="RMSprop", lr=lr, weight_decay=0.01
)

In [13]:
optimizer

RMSprop (
Parameter Group 0
    alpha: 0.9
    centered: False
    differentiable: False
    eps: 1e-08
    foreach: None
    lr: 0.005
    maximize: False
    momentum: 0.9
    weight_decay: 0.0

Parameter Group 1
    alpha: 0.9
    centered: False
    differentiable: False
    eps: 1e-08
    foreach: None
    lr: 0.005
    maximize: False
    momentum: 0.9
    weight_decay: 0.01
)

In [14]:
# As we are using Mixup, we can use BCE during training and CE for evaluation
train_loss_fn = BinaryCrossEntropy(
    target_threshold=bce_target_thresh, smoothing=smoothing
)
validate_loss_fn = torch.nn.CrossEntropyLoss()

In [15]:
# Create trainer and start training
trainer = TimmMixupTrainer(
    model=model,
    optimizer=optimizer,
    loss_func=train_loss_fn,
    eval_loss_fn=validate_loss_fn,
    mixup_args=mixup_args,
    num_classes=num_classes,
    callbacks=[
        *DEFAULT_CALLBACKS,
        SaveBestModelCallback(watch_metric="accuracy", greater_is_better=True),
    ],
)

In [16]:
"""trainer.train(
        per_device_batch_size=batch_size,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        num_epochs=num_epochs,
        create_scheduler_fn=trainer.create_scheduler,
    )"""

'trainer.train(\n        per_device_batch_size=batch_size,\n        train_dataset=train_dataset,\n        eval_dataset=eval_dataset,\n        num_epochs=num_epochs,\n        create_scheduler_fn=trainer.create_scheduler,\n    )'

In [17]:
trainer.evaluate(dataset=eval_dataset) 


Starting evaluation run


  0%|          | 0/6 [00:00<?, ?it/s]

In [18]:
checkpoint = torch.load(f="../training/best_model.pt", map_location=torch.device('cpu'))

In [19]:
checkpoint.keys()

dict_keys(['model_state_dict', 'optimizer_state_dict', 'loss'])

In [20]:
checkpoint["loss"]

tensor(0.1739)