### Imports and Jupyter setup

In [1]:
%load_ext autoreload
%autoreload 2

import os
import time
import tqdm
import torch
import wandb
import numpy as np
import pandas as pd
import torch.nn as nn

from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from timm.scheduler import CosineLRScheduler
from sklearn.metrics import f1_score, accuracy_score, top_k_accuracy_score

os.environ["CUDA_VISIBLE_DEVICES"]="1"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pd.set_option('display.max_columns', None)
device

device(type='cuda')

### Custom Imports

In [2]:
from fgvc.utils.datasets import TrainDataset
from fgvc.utils.augmentations import light_transforms, heavy_transforms
from fgvc.utils.utils import timer, init_logger, seed_everything, getModel

In [3]:
!nvidia-smi

Thu May 19 11:57:26 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.27       Driver Version: 465.27       CUDA Version: 11.3     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0 Off |                  N/A |
| 30%   26C    P8    24W / 350W |   2630MiB / 24268MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  On   | 00000000:C1:00.0 Off |                  N/A |
| 30%   26C    P8    26W / 350W |      3MiB / 24268MiB |      0%      Default |
|       

### Load Dataset Metadata

In [4]:
train_metadata = pd.read_csv("../../metadata/PlantCLEF2018_train_metadata.csv")
print(len(train_metadata))

val_metadata = pd.read_csv("../../metadata/PlantCLEF2018_val_metadata.csv")
print(len(val_metadata))

286841
33703


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [5]:
train_metadata['image_path'] = train_metadata['image_path'].apply(lambda x: x.replace('../../../nahouby/Datasets/PlantCLEF/', '/local/nahouby/Datasets/PlantCLEF/'))
train_metadata['image_path'] = train_metadata['image_path'].apply(lambda x: x.replace('../../nahouby/Datasets/PlantCLEF/', '/local/nahouby/Datasets/PlantCLEF/'))

val_metadata['image_path'] = val_metadata['image_path'].apply(lambda x: x.replace('../../../nahouby/Datasets/PlantCLEF/', '/local/nahouby/Datasets/PlantCLEF/'))
val_metadata['image_path'] = val_metadata['image_path'].apply(lambda x: x.replace('../../nahouby/Datasets/PlantCLEF/', '/local/nahouby/Datasets/PlantCLEF/'))

### Training Parameters

In [6]:
# Adjust BATCH_SIZE and ACCUMULATION_STEPS to values that if multiplied results in 64 !!!!!1

config = {"augmentations": 'light',
           "optimizer": 'SGD',
           "scheduler": 'plateau',
           "image_size": (224, 224),
           "random_seed": 777,
           "number_of_classes": len(train_metadata['class_id'].unique()),
           "architecture": 'vit_base_patch32_224',
           "batch_size": 32,
           "accumulation_steps": 4,
           "epochs": 100,
           "learning_rate": 0.01,
           "dataset": 'PlantCLEF2018',
           "loss": 'FocalLoss',
           "training_samples": len(train_metadata),
           "valid_samples": len(val_metadata),
           "workers": 12}

RUN_NAME = f"{config['architecture']}-{config['optimizer']}-{config['scheduler']}-{config['augmentations']}-{config['loss']}"

### Fix Seeds & Log Setup

In [7]:
LOG_FILE = f'{RUN_NAME}.log'
LOGGER = init_logger(LOG_FILE)

seed_everything(config['random_seed'])

### Init Model

In [8]:
# %%
model = getModel(config['architecture'], config['number_of_classes'], pretrained=True)
model_mean = list(model.default_cfg['mean'])
model_std = list(model.default_cfg['std'])

In [9]:
# Adjust BATCH_SIZE and ACCUMULATION_STEPS to values that if multiplied results in 64 !!!!!1

if config['augmentations'] == 'light':
    train_augmentations = light_transforms(data='train', image_size=config['image_size'], mean=model_mean, std=model_std)
    val_augmentations = light_transforms(data='valid', image_size=config['image_size'], mean=model_mean, std=model_std)
elif config['augmentations'] == 'light-random_crop':
    train_augmentations = light_transforms_rcrop(data='train', image_size=config['image_size'], mean=model_mean, std=model_std)
    val_augmentations = light_transforms_rcrop(data='valid-center-crop', image_size=config['image_size'], mean=model_mean, std=model_std)    
elif config['augmentations'] == 'heavy_transforms':
    train_augmentations = heavy_transforms(data='train', image_size=config['image_size'], mean=model_mean, std=model_std)
    val_augmentations = heavy_transforms(data='valid', image_size=config['image_size'], mean=model_mean, std=model_std)    

    
    
print('Augmentations:', config['augmentations'])

train_dataset = TrainDataset(train_metadata, transform=train_augmentations)
valid_dataset = TrainDataset(val_metadata, transform=val_augmentations)

train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=config['workers'])
valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=config['workers'])

Augmentations: light


### Init WandB

In [10]:
from fgvc.utils.wandb import init_wandb

init_wandb(config, RUN_NAME, entity='picekl', project='frontiers-plant-recognition')

[34m[1mwandb[0m: Currently logged in as: [33mpicekl[0m (use `wandb login --relogin` to force relogin)


### Set Optimizers!

In [11]:
if config['optimizer'] == 'AdamW':
    optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'], betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
elif config['optimizer'] == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9)

if config['scheduler'] =='plateau':
    scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=1, verbose=True, eps=1e-6)
elif config['scheduler'] == 'cyclic_cosine':
    CYCLES = 5
    t_initial = config['epochs'] / CYCLES
    scheduler = CosineLRScheduler(optimizer, t_initial=20, lr_min=0.0001, cycle_decay = 0.9, cycle_limit = 5)

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FocalLoss(torch.nn.modules.loss._WeightedLoss):
    def __init__(self, weight=None, gamma=2.5, reduction='mean'):
        super(FocalLoss, self).__init__(weight,reduction=reduction)
        self.gamma = gamma
        self.weight = weight #weight parameter will act as the alpha parameter to balance class weights

    def forward(self, logits, target):

        ce_loss = F.cross_entropy(logits, target, reduction=self.reduction, weight=self.weight)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma * ce_loss).mean()
        return focal_loss

In [13]:
if config['loss'] == 'CrossEntropyLoss':
    criterion = nn.CrossEntropyLoss()
elif config['loss'] == 'FocalLoss':
    criterion = FocalLoss()

### Training Loop

In [14]:
with timer('Train model', LOGGER):
    
    model.to(device)
    
    
    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(config['epochs']):
        
        start_time = time.time()

        model.train()
        avg_loss = 0.
        
        num_steps_per_epoch = len(train_loader)
        num_updates = epoch * num_steps_per_epoch

        optimizer.zero_grad()
        
        train_lbls = np.zeros((len(train_metadata)))
        train_preds = np.zeros((len(train_metadata)))

        for i, (images, labels, _) in tqdm.tqdm(enumerate(train_loader)):

            images = images.to(device)
            labels = labels.to(device)

            y_preds = model(images)
            loss = criterion(y_preds, labels)
            
            # Scale the loss to the mean of the accumulated batch size
            avg_loss += loss.item() / len(train_loader) 
            loss = loss / config['accumulation_steps']
            loss.backward()
            if (i - 1) % config['accumulation_steps'] == 0:
                optimizer.step()
                optimizer.zero_grad()
                
            if config['scheduler'] == 'cyclic_cosine':
                num_updates += 1
                scheduler.step_update(num_updates=num_updates)
                
                
            train_preds[i * len(labels): (i+1) * len(labels)] = y_preds.argmax(1).to('cpu').numpy()
            train_lbls[i * len(labels): (i+1) * len(labels)] = labels.to('cpu').numpy()
            
        model.eval()
        avg_val_loss = 0.
        preds = np.zeros((len(valid_dataset)))
        preds_raw = []

        for i, (images, labels, _) in enumerate(valid_loader):
            
            images = images.to(device)
            labels = labels.to(device)
            
            with torch.no_grad():
                y_preds = model(images)
            
            preds[i * len(images): (i+1) * len(images)] = y_preds.argmax(1).to('cpu').numpy()
            preds_raw.extend(y_preds.to('cpu').numpy())

            loss = criterion(y_preds, labels)
        
            avg_val_loss += loss.item() / len(valid_loader)
        
        
        if config['scheduler'] == 'plateau':
            scheduler.step(avg_val_loss)
        elif config['scheduler'] == 'cyclic_cosine':
            scheduler.step(epoch + 1)
        
        train_accuracy = accuracy_score(train_lbls, train_preds)
        train_f1 = f1_score(train_lbls, train_preds, average='macro')
        
        accuracy = accuracy_score(val_metadata['class_id'], preds)
        f1 = f1_score(val_metadata['class_id'], preds, average='macro')
        recall_3 = top_k_accuracy_score(val_metadata['class_id'], preds_raw, k=3)

        elapsed = time.time() - start_time
        
        LOGGER.debug(f'  Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f} F1: {f1*100:.2f}  Acc: {accuracy*100:.2f} Recall@3: {recall_3*100:.2f} time: {elapsed:.0f}s')
       
        wandb.log({'Train_loss (avr.)': avg_loss,
                   'Val. loss (avr.)': avg_val_loss,
                   'Val. F1': np.round(f1*100, 2),
                   'Val. Accuracy': np.round(accuracy*100, 2),
                   'Val. Recall@3': np.round(recall_3*100, 2),
                   'Learning Rate': optimizer.param_groups[0]["lr"],
                   'Train. Accuracy': np.round(train_accuracy*100, 2),
                   'Train. F1': np.round(train_f1*100, 2)})

        if accuracy>best_score:
            best_score = accuracy
            LOGGER.debug(f'  Epoch {epoch+1} - Save Best Accuracy: {best_score:.6f} Model')
            torch.save(model.state_dict(), f'{RUN_NAME}_best_accuracy.pth')

        if avg_val_loss<best_loss:
            best_loss = avg_val_loss
            LOGGER.debug(f'  Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
            torch.save(model.state_dict(), f'{RUN_NAME}_best_loss.pth')

2022-05-19 11:57:34,724 INFO [Train model] start
8964it [08:21, 17.89it/s]
2022-05-19 12:06:47,289 DEBUG   Epoch 1 - avg_train_loss: 4.1097  avg_val_loss: 2.8771 F1: 15.79  Acc: 44.16 Recall@3: 57.57 time: 551s
2022-05-19 12:06:47,291 DEBUG   Epoch 1 - Save Best Accuracy: 0.441593 Model
2022-05-19 12:06:47,695 DEBUG   Epoch 1 - Save Best Loss: 2.8771 Model
8964it [08:17, 18.01it/s]
2022-05-19 12:15:56,744 DEBUG   Epoch 2 - avg_train_loss: 1.7059  avg_val_loss: 2.0122 F1: 28.15  Acc: 52.20 Recall@3: 66.57 time: 549s
2022-05-19 12:15:56,746 DEBUG   Epoch 2 - Save Best Accuracy: 0.522031 Model
2022-05-19 12:15:57,625 DEBUG   Epoch 2 - Save Best Loss: 2.0122 Model
8964it [08:22, 17.86it/s]
2022-05-19 12:25:11,687 DEBUG   Epoch 3 - avg_train_loss: 0.6062  avg_val_loss: 1.5288 F1: 37.64  Acc: 58.40 Recall@3: 72.54 time: 553s
2022-05-19 12:25:11,689 DEBUG   Epoch 3 - Save Best Accuracy: 0.583954 Model
2022-05-19 12:25:12,607 DEBUG   Epoch 3 - Save Best Loss: 1.5288 Model
8964it [08:20, 17.90i

Epoch    25: reducing learning rate of group 0 to 9.0000e-03.


2022-05-19 15:47:15,150 DEBUG   Epoch 25 - avg_train_loss: 0.0009  avg_val_loss: 0.9747 F1: 49.34  Acc: 67.67 Recall@3: 79.63 time: 548s
2022-05-19 15:47:15,152 DEBUG   Epoch 25 - Save Best Accuracy: 0.676735 Model
8964it [08:20, 17.92it/s]
2022-05-19 15:56:27,827 DEBUG   Epoch 26 - avg_train_loss: 0.0008  avg_val_loss: 0.9740 F1: 49.31  Acc: 67.65 Recall@3: 79.65 time: 552s
2022-05-19 15:56:27,829 DEBUG   Epoch 26 - Save Best Loss: 0.9740 Model
8964it [08:15, 18.08it/s]
2022-05-19 16:05:36,056 DEBUG   Epoch 27 - avg_train_loss: 0.0008  avg_val_loss: 0.9729 F1: 49.35  Acc: 67.67 Recall@3: 79.74 time: 547s
2022-05-19 16:05:36,058 DEBUG   Epoch 27 - Save Best Loss: 0.9729 Model
8964it [08:22, 17.82it/s]
2022-05-19 16:14:51,402 DEBUG   Epoch 28 - avg_train_loss: 0.0008  avg_val_loss: 0.9734 F1: 49.25  Acc: 67.65 Recall@3: 79.61 time: 554s
8964it [08:16, 18.06it/s]
2022-05-19 16:23:59,103 DEBUG   Epoch 29 - avg_train_loss: 0.0007  avg_val_loss: 0.9727 F1: 49.21  Acc: 67.64 Recall@3: 79.69 

Epoch    31: reducing learning rate of group 0 to 8.1000e-03.


2022-05-19 16:42:19,386 DEBUG   Epoch 31 - avg_train_loss: 0.0007  avg_val_loss: 0.9728 F1: 49.40  Acc: 67.68 Recall@3: 79.71 time: 551s
2022-05-19 16:42:19,387 DEBUG   Epoch 31 - Save Best Accuracy: 0.676765 Model
8964it [08:20, 17.91it/s]
2022-05-19 16:51:32,408 DEBUG   Epoch 32 - avg_train_loss: 0.0006  avg_val_loss: 0.9705 F1: 49.46  Acc: 67.74 Recall@3: 79.78 time: 552s
2022-05-19 16:51:32,410 DEBUG   Epoch 32 - Save Best Accuracy: 0.677358 Model
2022-05-19 16:51:33,251 DEBUG   Epoch 32 - Save Best Loss: 0.9705 Model
8964it [08:21, 17.87it/s]
2022-05-19 17:00:47,261 DEBUG   Epoch 33 - avg_train_loss: 0.0006  avg_val_loss: 0.9710 F1: 49.46  Acc: 67.70 Recall@3: 79.76 time: 553s
8964it [08:20, 17.90it/s]


Epoch    34: reducing learning rate of group 0 to 7.2900e-03.


2022-05-19 17:09:59,616 DEBUG   Epoch 34 - avg_train_loss: 0.0006  avg_val_loss: 0.9705 F1: 49.49  Acc: 67.79 Recall@3: 79.73 time: 552s
2022-05-19 17:09:59,617 DEBUG   Epoch 34 - Save Best Accuracy: 0.677922 Model
8964it [08:22, 17.85it/s]
2022-05-19 17:19:14,189 DEBUG   Epoch 35 - avg_train_loss: 0.0006  avg_val_loss: 0.9695 F1: 49.44  Acc: 67.80 Recall@3: 79.77 time: 554s
2022-05-19 17:19:14,191 DEBUG   Epoch 35 - Save Best Accuracy: 0.678011 Model
2022-05-19 17:19:15,106 DEBUG   Epoch 35 - Save Best Loss: 0.9695 Model
8964it [08:19, 17.95it/s]
2022-05-19 17:28:26,868 DEBUG   Epoch 36 - avg_train_loss: 0.0006  avg_val_loss: 0.9706 F1: 49.56  Acc: 67.81 Recall@3: 79.75 time: 551s
2022-05-19 17:28:26,870 DEBUG   Epoch 36 - Save Best Accuracy: 0.678070 Model
8964it [08:17, 18.01it/s]


Epoch    37: reducing learning rate of group 0 to 6.5610e-03.


2022-05-19 17:37:36,841 DEBUG   Epoch 37 - avg_train_loss: 0.0006  avg_val_loss: 0.9699 F1: 49.58  Acc: 67.80 Recall@3: 79.77 time: 549s
8964it [08:19, 17.94it/s]
2022-05-19 17:46:48,134 DEBUG   Epoch 38 - avg_train_loss: 0.0005  avg_val_loss: 0.9674 F1: 49.53  Acc: 67.84 Recall@3: 79.80 time: 551s
2022-05-19 17:46:48,136 DEBUG   Epoch 38 - Save Best Accuracy: 0.678397 Model
2022-05-19 17:46:48,974 DEBUG   Epoch 38 - Save Best Loss: 0.9674 Model
8964it [08:22, 17.85it/s]


Epoch    48: reducing learning rate of group 0 to 4.7830e-03.


2022-05-19 19:18:44,455 DEBUG   Epoch 48 - avg_train_loss: 0.0004  avg_val_loss: 0.9672 F1: 49.74  Acc: 67.88 Recall@3: 79.79 time: 554s
2022-05-19 19:18:44,457 DEBUG   Epoch 48 - Save Best Accuracy: 0.678753 Model
8964it [08:19, 17.95it/s]
2022-05-19 19:27:56,436 DEBUG   Epoch 49 - avg_train_loss: 0.0004  avg_val_loss: 0.9670 F1: 49.59  Acc: 67.86 Recall@3: 79.85 time: 551s
8964it [08:21, 17.88it/s]


Epoch    50: reducing learning rate of group 0 to 4.3047e-03.


2022-05-19 19:37:09,298 DEBUG   Epoch 50 - avg_train_loss: 0.0004  avg_val_loss: 0.9671 F1: 49.64  Acc: 67.87 Recall@3: 79.85 time: 553s
8964it [08:17, 18.02it/s]
2022-05-19 19:46:18,453 DEBUG   Epoch 51 - avg_train_loss: 0.0004  avg_val_loss: 0.9668 F1: 49.67  Acc: 67.87 Recall@3: 79.84 time: 549s
8964it [08:18, 17.97it/s]
2022-05-19 19:55:28,837 DEBUG   Epoch 52 - avg_train_loss: 0.0004  avg_val_loss: 0.9662 F1: 49.70  Acc: 67.83 Recall@3: 79.87 time: 550s
2022-05-19 19:55:28,839 DEBUG   Epoch 52 - Save Best Loss: 0.9662 Model
8964it [08:18, 17.98it/s]
2022-05-19 20:04:40,155 DEBUG   Epoch 53 - avg_train_loss: 0.0004  avg_val_loss: 0.9667 F1: 49.74  Acc: 67.88 Recall@3: 79.85 time: 550s
2022-05-19 20:04:40,157 DEBUG   Epoch 53 - Save Best Accuracy: 0.678812 Model
8964it [08:18, 17.97it/s]
2022-05-19 20:13:51,483 DEBUG   Epoch 54 - avg_train_loss: 0.0004  avg_val_loss: 0.9655 F1: 49.80  Acc: 67.91 Recall@3: 79.87 time: 550s
2022-05-19 20:13:51,485 DEBUG   Epoch 54 - Save Best Accuracy

Epoch    58: reducing learning rate of group 0 to 3.8742e-03.


2022-05-19 20:50:37,495 DEBUG   Epoch 58 - avg_train_loss: 0.0004  avg_val_loss: 0.9655 F1: 49.74  Acc: 67.85 Recall@3: 79.81 time: 551s
8964it [08:21, 17.88it/s]
2022-05-19 20:59:50,275 DEBUG   Epoch 59 - avg_train_loss: 0.0004  avg_val_loss: 0.9661 F1: 49.74  Acc: 67.87 Recall@3: 79.83 time: 553s
8964it [08:18, 17.97it/s]


Epoch    60: reducing learning rate of group 0 to 3.4868e-03.


2022-05-19 21:09:00,448 DEBUG   Epoch 60 - avg_train_loss: 0.0004  avg_val_loss: 0.9655 F1: 49.77  Acc: 67.90 Recall@3: 79.83 time: 550s
8964it [08:17, 18.03it/s]
2022-05-19 21:18:09,705 DEBUG   Epoch 61 - avg_train_loss: 0.0004  avg_val_loss: 0.9652 F1: 49.66  Acc: 67.89 Recall@3: 79.83 time: 549s
8964it [08:14, 18.11it/s]
2022-05-19 21:27:16,397 DEBUG   Epoch 62 - avg_train_loss: 0.0004  avg_val_loss: 0.9648 F1: 49.67  Acc: 67.88 Recall@3: 79.82 time: 547s
2022-05-19 21:27:16,399 DEBUG   Epoch 62 - Save Best Loss: 0.9648 Model
8964it [08:16, 18.04it/s]
2022-05-19 21:36:26,423 DEBUG   Epoch 63 - avg_train_loss: 0.0004  avg_val_loss: 0.9643 F1: 49.83  Acc: 67.94 Recall@3: 79.89 time: 549s
2022-05-19 21:36:26,425 DEBUG   Epoch 63 - Save Best Accuracy: 0.679405 Model
2022-05-19 21:36:27,334 DEBUG   Epoch 63 - Save Best Loss: 0.9643 Model
8964it [08:15, 18.09it/s]
2022-05-19 21:45:35,527 DEBUG   Epoch 64 - avg_train_loss: 0.0004  avg_val_loss: 0.9648 F1: 49.79  Acc: 67.92 Recall@3: 79.85 

Epoch    65: reducing learning rate of group 0 to 3.1381e-03.


2022-05-19 21:54:41,763 DEBUG   Epoch 65 - avg_train_loss: 0.0004  avg_val_loss: 0.9651 F1: 49.77  Acc: 67.91 Recall@3: 79.89 time: 546s
8964it [08:15, 18.11it/s]
2022-05-19 22:03:49,008 DEBUG   Epoch 66 - avg_train_loss: 0.0004  avg_val_loss: 0.9660 F1: 49.67  Acc: 67.84 Recall@3: 79.86 time: 547s
8964it [08:15, 18.10it/s]


Epoch    67: reducing learning rate of group 0 to 2.8243e-03.


2022-05-19 22:12:56,225 DEBUG   Epoch 67 - avg_train_loss: 0.0004  avg_val_loss: 0.9652 F1: 49.77  Acc: 67.90 Recall@3: 79.84 time: 547s
8964it [08:16, 18.06it/s]
2022-05-19 22:22:04,554 DEBUG   Epoch 68 - avg_train_loss: 0.0004  avg_val_loss: 0.9657 F1: 49.75  Acc: 67.89 Recall@3: 79.87 time: 548s
8964it [08:15, 18.09it/s]


Epoch    69: reducing learning rate of group 0 to 2.5419e-03.


2022-05-19 22:31:12,175 DEBUG   Epoch 69 - avg_train_loss: 0.0004  avg_val_loss: 0.9644 F1: 49.72  Acc: 67.87 Recall@3: 79.85 time: 548s
8964it [08:13, 18.16it/s]
2022-05-19 22:40:17,676 DEBUG   Epoch 70 - avg_train_loss: 0.0003  avg_val_loss: 0.9646 F1: 49.82  Acc: 67.93 Recall@3: 79.80 time: 546s
8964it [08:13, 18.17it/s]


Epoch    71: reducing learning rate of group 0 to 2.2877e-03.


2022-05-19 22:49:22,896 DEBUG   Epoch 71 - avg_train_loss: 0.0004  avg_val_loss: 0.9644 F1: 49.77  Acc: 67.88 Recall@3: 79.82 time: 545s
8964it [08:17, 18.02it/s]
2022-05-19 22:58:32,361 DEBUG   Epoch 72 - avg_train_loss: 0.0003  avg_val_loss: 0.9640 F1: 49.74  Acc: 67.86 Recall@3: 79.85 time: 549s
2022-05-19 22:58:32,364 DEBUG   Epoch 72 - Save Best Loss: 0.9640 Model
8964it [08:14, 18.15it/s]
2022-05-19 23:07:39,265 DEBUG   Epoch 73 - avg_train_loss: 0.0003  avg_val_loss: 0.9642 F1: 49.76  Acc: 67.89 Recall@3: 79.86 time: 546s
8964it [08:16, 18.04it/s]
2022-05-19 23:16:47,967 DEBUG   Epoch 74 - avg_train_loss: 0.0003  avg_val_loss: 0.9639 F1: 49.75  Acc: 67.86 Recall@3: 79.87 time: 549s
2022-05-19 23:16:47,969 DEBUG   Epoch 74 - Save Best Loss: 0.9639 Model
8964it [08:16, 18.04it/s]
2022-05-19 23:25:57,754 DEBUG   Epoch 75 - avg_train_loss: 0.0003  avg_val_loss: 0.9644 F1: 49.77  Acc: 67.88 Recall@3: 79.84 time: 549s
5014it [04:37, 18.21it/s]IOPub message rate exceeded.
The notebook 

Epoch    78: reducing learning rate of group 0 to 1.8530e-03.


2022-05-19 23:53:22,151 DEBUG   Epoch 78 - avg_train_loss: 0.0003  avg_val_loss: 0.9643 F1: 49.82  Acc: 67.91 Recall@3: 79.86 time: 547s
8964it [08:16, 18.04it/s]
2022-05-20 00:02:30,755 DEBUG   Epoch 79 - avg_train_loss: 0.0003  avg_val_loss: 0.9639 F1: 49.80  Acc: 67.91 Recall@3: 79.83 time: 549s
2022-05-20 00:02:30,757 DEBUG   Epoch 79 - Save Best Loss: 0.9639 Model
8964it [08:16, 18.07it/s]


Epoch    80: reducing learning rate of group 0 to 1.6677e-03.


2022-05-20 00:11:39,831 DEBUG   Epoch 80 - avg_train_loss: 0.0003  avg_val_loss: 0.9644 F1: 49.75  Acc: 67.89 Recall@3: 79.85 time: 548s
2787it [02:33, 17.49it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

8964it [08:15, 18.11it/s]
2022-05-20 00:39:00,610 DEBUG   Epoch 83 - avg_train_loss: 0.0003  avg_val_loss: 0.9641 F1: 49.80  Acc: 67.89 Recall@3: 79.87 time: 547s
1285it [01:10, 18.32it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch    86: reducing learning rate of group 0 to 1.2158e-03.


2022-05-20 01:06:21,100 DEBUG   Epoch 86 - avg_train_loss: 0.0003  avg_val_loss: 0.9645 F1: 49.78  Acc: 67.87 Recall@3: 79.86 time: 547s
3245it [02:59, 17.96it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

6951it [06:25, 17.45it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

5090it [04:43, 18.59it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To chan

Epoch    96: reducing learning rate of group 0 to 7.9766e-04.


2022-05-20 02:37:46,822 DEBUG   Epoch 96 - avg_train_loss: 0.0003  avg_val_loss: 0.9638 F1: 49.81  Acc: 67.89 Recall@3: 79.86 time: 549s
2022-05-20 02:37:46,823 DEBUG   Epoch 96 - Save Best Loss: 0.9638 Model
1477it [01:21, 17.90it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

8964it [08:15, 18.10it/s]


Epoch    99: reducing learning rate of group 0 to 7.1790e-04.


2022-05-20 03:05:12,224 DEBUG   Epoch 99 - avg_train_loss: 0.0003  avg_val_loss: 0.9637 F1: 49.76  Acc: 67.87 Recall@3: 79.92 time: 548s
2687it [02:27, 18.31it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [15]:
torch.save(model.state_dict(), f'{RUN_NAME}-100E.pth')