In [1]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install torch torchvision torchaudio
!pip install pandas numpy pillow pathlib datasets scikit-learn optuna plotly nbformat
!pip install transformers[torch]
!pip install ipywidgets

Collecting torch
  Downloading torch-2.4.1-cp310-cp310-win_amd64.whl.metadata (27 kB)
Collecting torchvision
  Downloading torchvision-0.19.1-cp310-cp310-win_amd64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading torchaudio-2.4.1-cp310-cp310-win_amd64.whl.metadata (6.5 kB)
Collecting filelock (from torch)
  Downloading filelock-3.15.4-py3-none-any.whl.metadata (2.9 kB)
Collecting sympy (from torch)
  Downloading sympy-1.13.2-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Downloading networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting jinja2 (from torch)
  Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Collecting numpy (from torchvision)
  Downloading numpy-2.1.1-cp310-cp310-win_amd64.whl.metadata (59 kB)
     ---------------------------------------- 0.0/59.7 kB ? eta -:--:--
     ---------------------------------------- 59.7/59.7 kB ? eta 0:

In [2]:
import datetime
import json
import numpy as np
import optuna
import pandas as pd
import pickle
import torch

from dataclasses import dataclass
from datasets import load_metric, list_metrics, Dataset
from enum import auto, Enum
from functools import partial
from math import ceil
from pathlib import Path
from PIL import Image
from torch.utils.data import Dataset as Dataset_torch
from transformers import ViTImageProcessor, ViTForImageClassification, TrainingArguments, Trainer
from torchvision.transforms.functional import (resize, pad, to_tensor, to_pil_image)
from torchvision.transforms import (
    Compose,
    ColorJitter,
    GaussianBlur,
    RandomHorizontalFlip,
    RandomVerticalFlip,
    RandomAffine)
from typing import List


import logging
import sys

In [3]:
class NFCS(Enum):
    SN = auto()
    FS = auto()
    FP = auto()
    BA = auto()
    BE = auto()

@dataclass
class DatasetMeta:
    name: str
    csv_path: str

full_dataset = DatasetMeta(
    name='Full', csv_path=r"D:\ComputerScience\Mestrado\data\UNIFESP\NEW_GT.csv")
unifesp_dataset = DatasetMeta(
    name='UNIFESP', csv_path=r"D:\ComputerScience\Mestrado\data\UNIFESP\NEW_GT_UNIFESP.csv"
)

## Constants

In [4]:
# Datetime
now = datetime.datetime.now()
now_str = now.strftime('%d%m%y-%H%M')
# General
MOCK = False # Marks this as a mock run. Mock runs are saved in a test directory to avoid poluting the results directory
DESCRIPTION = '' # Use this string to describe any important points of the experiment. This will be saved along with the results
# Model
DATASET:DatasetMeta   = full_dataset
NFCS_REGION:NFCS      = NFCS.SN
MASKED:bool           = False # If true use the masked crops if false use the cropped only
VALIDATION_SIZE:float = 0.15 # % of images used for validation
METRICS:List[str]     = ["accuracy", "f1"] # For a list of available metrics call the function list_metrics()
OBJECTIVE_METRIC      = "eval_f1" # Metric that will be optimized by optuna
GREATER_IS_BETTER     = True # Relative to the objective metric
DIRECTION             = "maximize" if GREATER_IS_BETTER else "minimize"
# Hyperparam tunning
NUM_TRIALS:int   = 30 # 20
TRAIN_EPOCHS:int = 20 # 10
# Model path to download from huggingface
model_name_or_path:str = 'google/vit-base-patch16-224-in21k'



# Dataset paths
output_dir = r'D:\ComputerScience\Mestrado\results\Optuna' + f"{'/mock' if MOCK else ''}/ViT/{DATASET.name}/{'non_' if MASKED==False else ''}masked/{NFCS_REGION.name}/{now_str}"
csv_path = DATASET.csv_path
masked_images_path = r"D:\ComputerScience\Mestrado\data\masked_crops"
non_masked_images_path = r"D:\ComputerScience\Mestrado\data\non_masked_crops"
# Create output dirs if needed
Path(output_dir).mkdir(parents=True, exist_ok=True)

---

## Hyperparameter Optimization using Optuna

In [5]:
def optuna_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-3, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [4, 8, 16, 32]),
        "weight_decay": trial.suggest_categorical("weight_decay", [0, 0.1])
    }

def compute_objective(metrics):
    return metrics[OBJECTIVE_METRIC]

---

## Model / Model Pre-Processor

In [6]:
# Processor (pre-process the images to fit the expected input)
processor = ViTImageProcessor.from_pretrained(model_name_or_path)

def model_init(trial):
    # Model with pre-loaded weights and + an untrained classifier head
    model = ViTForImageClassification.from_pretrained(
        model_name_or_path,
        num_labels=2,
        # id2label={str(i): c for i, c in enumerate(labels)},
        # label2id={c: str(i) for i, c in enumerate(labels)}
    )
    # Freeze parameters for the base model (don't freeze the classification head)
    # for param in model.base_model.parameters():
    #     param.requires_grad = False
    
    return model


---

## Augmentations

In [7]:
# Image augmentations
augmentations = Compose([ # https://pytorch.org/vision/stable/auto_examples/transforms/plot_transforms_illustrations.html
    RandomAffine(degrees=(-45,45), translate=(0.1, 0.3), scale=(0.7, 0.9)),
    ColorJitter(brightness=.4, hue=0.3, saturation=0.6), # not contrast, or the background will be afected too
    GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 2)),
    RandomHorizontalFlip(p=0.15),
    RandomVerticalFlip(p=0.15),
])

---

## Dataset

In [8]:
def gen(ds=[]):
    """Function to convert InfantDataset to generator"""
    for img, label, _ in ds:
        yield {
            'image': img,
            'label': label
        }

class InfantDataset(Dataset_torch):
    """Base dataset to load images and labels from the csv file + resize/pad to 224x224"""
    def __init__(self, image_dir:str, csv_path:str, nfcs_component:NFCS) -> None:
        super().__init__()
        self.data = pd.read_csv(csv_path)
        self.nfcs_component = nfcs_component
        nfcs2dir = {
            NFCS.SN: 'nasolabial_fold',
            NFCS.FS: 'forehead',
            NFCS.FP: 'eyes',
            NFCS.BA: 'mouth',
            NFCS.BE: 'mouth'
        }
        self.image_dir = Path(image_dir, nfcs2dir[nfcs_component])

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        row = self.data.iloc[index]
        label = row[self.nfcs_component.name]
        
        # Search image file
        try:
            img_path = next(self.image_dir.glob(f"{row['image_id']}.*"))
        except StopIteration:
            raise FileNotFoundError(f"No file named {row['image_id']} found at {self.image_dir}")
        # Load image
        img = Image.open(img_path)
        # Resize keeping aspect ratio
        img = self.resize_pad(img, 224) # ViT resizes to 224x224, so we resize first to avoid distortions

        return img, label, img_path
    
    def resize_pad(self, img, smaller_side_size:int):
        # img = img.rotate(90, expand=1) # Rotation for test purposes
        # Resize
        ar = img.height/img.width # Aspect Ratio
        img = resize(img, (smaller_side_size, round(smaller_side_size/ar)) if ar > 1 else (round(smaller_side_size*ar), smaller_side_size)) # Resize to fit the smaller dimension to smaller_side_size
        # Padding
        pad_horizontal = (smaller_side_size-img.width)/2 # How much to pad horizontally
        pad_vertical = (smaller_side_size-img.height)/2 # Hor much to pad vertically
        img = pad(img, padding=(ceil(pad_horizontal), ceil(pad_vertical), int(pad_horizontal), int(pad_vertical)), padding_mode ='constant') # (left, top, right, bottom)
        return img


# Instances
torch_ds = InfantDataset(
    image_dir=masked_images_path if MASKED else non_masked_images_path,
    csv_path=csv_path,
    nfcs_component=NFCS_REGION
)

In [9]:
def transforms(batch, processor, augmentations):
    """Apply augmentations and necessary preprocessing using the processor instance"""
    inputs = processor([augmentations(x) for x in batch['image']], return_tensors='pt'),
    # For some reason, some times the output of the line above is a tuple and other times is a dictionary.. so the logic bellow is necessary
    if isinstance(inputs, tuple):
        if len(inputs) > 1:
            raise Exception(f"Unexpected len for input in transforms ({len(inputs)})")
        inputs = inputs[0]
    else:
        raise Exception("Unexpected...")
    inputs['labels'] = batch['label']
    return inputs

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

# Cast dataset from pytorch to huggingface
ds = Dataset.from_generator(gen, gen_kwargs=({'ds':torch_ds}))
# Set train test split
ds = ds.train_test_split(test_size=VALIDATION_SIZE, shuffle=False) # Shuffle false so only images in the unifesp set are used for validation
# Add augmentations and pre-process
ds['train'].set_transform(partial(transforms, processor=processor, augmentations=augmentations))
ds['test'].set_transform(partial(transforms, processor=processor, augmentations=Compose([]))) # No augmentations in the test set

  StockPickler.save(self, obj, save_persistent_id)
  StockPickler.save(self, obj, save_persistent_id)


Generating train split: 0 examples [00:00, ? examples/s]

In [10]:
# === SAVE DATASET TO DISK === #

# from tqdm import tqdm
# out_dir = Path(f'/home/phdomingues/masters/src/Labeled_split/{NFCS_REGION.name}')
# for img, label, pth in tqdm(torch_ds, total=len(ds)):
#     out_dir_label = out_dir / str(label)
#     out_dir_label.mkdir(exist_ok=True, parents=True)
#     img.save(out_dir_label / pth.name)

---

## Training

In [11]:
def compute_metrics(p, metrics):
    pred = np.argmax(p.predictions, axis=1)
    val = [metric.compute(predictions=pred, references=p.label_ids) for metric in metrics]
    return {k: v for d in val for k, v in d.items()} # Merge list of metrics into a single dictionary with all metrics

In [12]:
training_args = TrainingArguments(
    output_dir=output_dir,
    save_strategy='epoch',
    evaluation_strategy='epoch',
    logging_strategy='epoch',
    # learning_rate=config.learning_rate,
    # weight_decay=config.weight_decay,
    num_train_epochs=TRAIN_EPOCHS,
    # per_device_train_batch_size=config.batch_size,
    per_device_eval_batch_size=1,
    save_total_limit=1,
    remove_unused_columns=False,
    push_to_hub=False,
    fp16=False, # float point 16 bit precision (instead of 32)
    load_best_model_at_end=True,
    metric_for_best_model=OBJECTIVE_METRIC,
    greater_is_better=GREATER_IS_BETTER,
    save_only_model=True
)

trainer = Trainer(
    model=None,
    model_init=model_init,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=partial(compute_metrics, metrics=[load_metric(m, trust_remote_code=True) for m in METRICS]),
    train_dataset=ds['train'],
    eval_dataset=ds['test'],
    tokenizer=processor,
)

  compute_metrics=partial(compute_metrics, metrics=[load_metric(m, trust_remote_code=True) for m in METRICS]),
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
best_run = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=optuna_hp_space,
    n_trials=NUM_TRIALS,
    compute_objective=compute_objective,
)

[I 2024-07-22 18:21:27,241] A new study created in memory with name: no-name-c7f6252f-bddb-4c10-ac34-9b13dd0254a4
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

  context_layer = torch.nn.functional.scaled_dot_product_attention(


{'loss': 0.6651, 'grad_norm': 0.06999296694993973, 'learning_rate': 0.0004656909601678784, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6698302626609802, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.2316, 'eval_samples_per_second': 87.693, 'eval_steps_per_second': 87.693, 'epoch': 1.0}
{'loss': 0.6483, 'grad_norm': 0.7654557824134827, 'learning_rate': 0.00044118090963272694, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6497477293014526, 'eval_accuracy': 0.7129629629629629, 'eval_f1': 0.6352941176470588, 'eval_runtime': 1.3243, 'eval_samples_per_second': 81.554, 'eval_steps_per_second': 81.554, 'epoch': 2.0}
{'loss': 0.651, 'grad_norm': 0.5001260042190552, 'learning_rate': 0.0004166708590975754, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6847137212753296, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.2511, 'eval_samples_per_second': 86.322, 'eval_steps_per_second': 86.322, 'epoch': 3.0}
{'loss': 0.6538, 'grad_norm': 0.1369030624628067, 'learning_rate': 0.00039216080856242395, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6334567070007324, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0425531914893617, 'eval_runtime': 1.2788, 'eval_samples_per_second': 84.454, 'eval_steps_per_second': 84.454, 'epoch': 4.0}
{'loss': 0.6352, 'grad_norm': 0.6642331480979919, 'learning_rate': 0.0003676507580272724, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.673330545425415, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.2656, 'eval_samples_per_second': 85.335, 'eval_steps_per_second': 85.335, 'epoch': 5.0}
{'loss': 0.6655, 'grad_norm': 0.33485147356987, 'learning_rate': 0.00034314070749212096, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6596860289573669, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.2703, 'eval_samples_per_second': 85.019, 'eval_steps_per_second': 85.019, 'epoch': 6.0}
{'loss': 0.6529, 'grad_norm': 0.25088992714881897, 'learning_rate': 0.0003186306569569695, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6386189460754395, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.2771, 'eval_samples_per_second': 84.569, 'eval_steps_per_second': 84.569, 'epoch': 7.0}
{'loss': 0.6482, 'grad_norm': 0.9563993811607361, 'learning_rate': 0.00029412060642181796, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.650730550289154, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.2295, 'eval_samples_per_second': 87.838, 'eval_steps_per_second': 87.838, 'epoch': 8.0}
{'loss': 0.6602, 'grad_norm': 0.20679472386837006, 'learning_rate': 0.0002696105558866665, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6640987396240234, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.2574, 'eval_samples_per_second': 85.894, 'eval_steps_per_second': 85.894, 'epoch': 9.0}
{'loss': 0.6507, 'grad_norm': 0.2743161916732788, 'learning_rate': 0.00024510050535151497, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6334686279296875, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.1966, 'eval_samples_per_second': 90.256, 'eval_steps_per_second': 90.256, 'epoch': 10.0}
{'loss': 0.64, 'grad_norm': 0.43839025497436523, 'learning_rate': 0.00022059045481636347, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.61347895860672, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.2202, 'eval_samples_per_second': 88.51, 'eval_steps_per_second': 88.51, 'epoch': 11.0}
{'loss': 0.6403, 'grad_norm': 0.2311546355485916, 'learning_rate': 0.00019608040428121198, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6584119200706482, 'eval_accuracy': 0.5740740740740741, 'eval_f1': 0.6567164179104478, 'eval_runtime': 1.2295, 'eval_samples_per_second': 87.841, 'eval_steps_per_second': 87.841, 'epoch': 12.0}
{'loss': 0.6414, 'grad_norm': 0.48273375630378723, 'learning_rate': 0.00017157035374606048, 'epoch': 13.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.620082437992096, 'eval_accuracy': 0.6388888888888888, 'eval_f1': 0.36065573770491804, 'eval_runtime': 1.2642, 'eval_samples_per_second': 85.431, 'eval_steps_per_second': 85.431, 'epoch': 13.0}
{'loss': 0.6251, 'grad_norm': 0.5458653569221497, 'learning_rate': 0.00014706030321090898, 'epoch': 14.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5955271124839783, 'eval_accuracy': 0.6944444444444444, 'eval_f1': 0.6972477064220184, 'eval_runtime': 1.2757, 'eval_samples_per_second': 84.657, 'eval_steps_per_second': 84.657, 'epoch': 14.0}
{'loss': 0.6317, 'grad_norm': 0.21537216007709503, 'learning_rate': 0.00012255025267575748, 'epoch': 15.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6020371317863464, 'eval_accuracy': 0.6203703703703703, 'eval_f1': 0.19607843137254902, 'eval_runtime': 1.2147, 'eval_samples_per_second': 88.914, 'eval_steps_per_second': 88.914, 'epoch': 15.0}
{'loss': 0.6608, 'grad_norm': 0.18064434826374054, 'learning_rate': 9.804020214060599e-05, 'epoch': 16.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6330288648605347, 'eval_accuracy': 0.7407407407407407, 'eval_f1': 0.72, 'eval_runtime': 1.2281, 'eval_samples_per_second': 87.942, 'eval_steps_per_second': 87.942, 'epoch': 16.0}
{'loss': 0.6367, 'grad_norm': 0.8379955291748047, 'learning_rate': 7.353015160545449e-05, 'epoch': 17.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6166279911994934, 'eval_accuracy': 0.6018518518518519, 'eval_f1': 0.0851063829787234, 'eval_runtime': 1.2246, 'eval_samples_per_second': 88.189, 'eval_steps_per_second': 88.189, 'epoch': 17.0}
{'loss': 0.6304, 'grad_norm': 0.6438483595848083, 'learning_rate': 4.9020101070302994e-05, 'epoch': 18.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6081883311271667, 'eval_accuracy': 0.7129629629629629, 'eval_f1': 0.5866666666666667, 'eval_runtime': 1.4838, 'eval_samples_per_second': 72.784, 'eval_steps_per_second': 72.784, 'epoch': 18.0}
{'loss': 0.6322, 'grad_norm': 0.4010988473892212, 'learning_rate': 2.4510050535151497e-05, 'epoch': 19.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6181373000144958, 'eval_accuracy': 0.7037037037037037, 'eval_f1': 0.6666666666666666, 'eval_runtime': 1.4303, 'eval_samples_per_second': 75.51, 'eval_steps_per_second': 75.51, 'epoch': 19.0}
{'loss': 0.6249, 'grad_norm': 0.1935599148273468, 'learning_rate': 0.0, 'epoch': 20.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6219367980957031, 'eval_accuracy': 0.7314814814814815, 'eval_f1': 0.7289719626168224, 'eval_runtime': 1.2401, 'eval_samples_per_second': 87.092, 'eval_steps_per_second': 87.092, 'epoch': 20.0}


[I 2024-07-22 18:26:08,268] Trial 0 finished with value: 0.7289719626168224 and parameters: {'learning_rate': 0.0004902010107030299, 'per_device_train_batch_size': 32, 'weight_decay': 0.1}. Best is trial 0 with value: 0.7289719626168224.


{'train_runtime': 280.4176, 'train_samples_per_second': 43.293, 'train_steps_per_second': 1.355, 'train_loss': 0.6447243841070878, 'epoch': 20.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/3040 [00:00<?, ?it/s]

{'loss': 0.5774, 'grad_norm': 5.202595233917236, 'learning_rate': 2.377263190715804e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5817466378211975, 'eval_accuracy': 0.7222222222222222, 'eval_f1': 0.7368421052631579, 'eval_runtime': 1.2606, 'eval_samples_per_second': 85.67, 'eval_steps_per_second': 85.67, 'epoch': 1.0}
{'loss': 0.4383, 'grad_norm': 3.426546812057495, 'learning_rate': 2.2521440754149727e-05, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6006231904029846, 'eval_accuracy': 0.6944444444444444, 'eval_f1': 0.45901639344262296, 'eval_runtime': 1.265, 'eval_samples_per_second': 85.373, 'eval_steps_per_second': 85.373, 'epoch': 2.0}
{'loss': 0.3959, 'grad_norm': 0.4657536447048187, 'learning_rate': 2.1270249601141406e-05, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.44655606150627136, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7916666666666666, 'eval_runtime': 1.2748, 'eval_samples_per_second': 84.72, 'eval_steps_per_second': 84.72, 'epoch': 3.0}
{'loss': 0.3969, 'grad_norm': 9.300559043884277, 'learning_rate': 2.0019058448133092e-05, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5913769006729126, 'eval_accuracy': 0.7962962962962963, 'eval_f1': 0.7843137254901961, 'eval_runtime': 1.2417, 'eval_samples_per_second': 86.979, 'eval_steps_per_second': 86.979, 'epoch': 4.0}
{'loss': 0.4056, 'grad_norm': 7.4034271240234375, 'learning_rate': 1.876786729512477e-05, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5174235701560974, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7407407407407407, 'eval_runtime': 1.2569, 'eval_samples_per_second': 85.924, 'eval_steps_per_second': 85.924, 'epoch': 5.0}
{'loss': 0.3808, 'grad_norm': 20.040563583374023, 'learning_rate': 1.751667614211645e-05, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.49517324566841125, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.8, 'eval_runtime': 1.3395, 'eval_samples_per_second': 80.629, 'eval_steps_per_second': 80.629, 'epoch': 6.0}
{'loss': 0.3921, 'grad_norm': 19.414567947387695, 'learning_rate': 1.6265484989108136e-05, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5070281624794006, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7916666666666666, 'eval_runtime': 1.2695, 'eval_samples_per_second': 85.074, 'eval_steps_per_second': 85.074, 'epoch': 7.0}
{'loss': 0.3641, 'grad_norm': 0.2927204370498657, 'learning_rate': 1.5014293836099816e-05, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5234225392341614, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7341772151898734, 'eval_runtime': 1.3318, 'eval_samples_per_second': 81.091, 'eval_steps_per_second': 81.091, 'epoch': 8.0}
{'loss': 0.3102, 'grad_norm': 0.3172999620437622, 'learning_rate': 1.37631026830915e-05, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5384518504142761, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7435897435897436, 'eval_runtime': 1.2551, 'eval_samples_per_second': 86.047, 'eval_steps_per_second': 86.047, 'epoch': 9.0}
{'loss': 0.3523, 'grad_norm': 16.021732330322266, 'learning_rate': 1.2511911530083181e-05, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5296334624290466, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.7654320987654321, 'eval_runtime': 1.3145, 'eval_samples_per_second': 82.159, 'eval_steps_per_second': 82.159, 'epoch': 10.0}
{'loss': 0.2882, 'grad_norm': 0.2712341248989105, 'learning_rate': 1.1260720377074863e-05, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5475230813026428, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.8045977011494253, 'eval_runtime': 1.2886, 'eval_samples_per_second': 83.814, 'eval_steps_per_second': 83.814, 'epoch': 11.0}
{'loss': 0.3109, 'grad_norm': 0.15854111313819885, 'learning_rate': 1.0009529224066546e-05, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5761024951934814, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7272727272727273, 'eval_runtime': 1.2492, 'eval_samples_per_second': 86.454, 'eval_steps_per_second': 86.454, 'epoch': 12.0}
{'loss': 0.2822, 'grad_norm': 0.14670172333717346, 'learning_rate': 8.758338071058225e-06, 'epoch': 13.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5508445501327515, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7341772151898734, 'eval_runtime': 1.3823, 'eval_samples_per_second': 78.129, 'eval_steps_per_second': 78.129, 'epoch': 13.0}
{'loss': 0.2743, 'grad_norm': 0.4711785316467285, 'learning_rate': 7.507146918049908e-06, 'epoch': 14.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5239051580429077, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.759493670886076, 'eval_runtime': 1.3258, 'eval_samples_per_second': 81.461, 'eval_steps_per_second': 81.461, 'epoch': 14.0}
{'loss': 0.2608, 'grad_norm': 7.310329914093018, 'learning_rate': 6.2559557650415904e-06, 'epoch': 15.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5805556774139404, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7272727272727273, 'eval_runtime': 1.3093, 'eval_samples_per_second': 82.489, 'eval_steps_per_second': 82.489, 'epoch': 15.0}
{'loss': 0.236, 'grad_norm': 0.29033225774765015, 'learning_rate': 5.004764612033273e-06, 'epoch': 16.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5946434140205383, 'eval_accuracy': 0.7962962962962963, 'eval_f1': 0.717948717948718, 'eval_runtime': 1.3449, 'eval_samples_per_second': 80.304, 'eval_steps_per_second': 80.304, 'epoch': 16.0}
{'loss': 0.2442, 'grad_norm': 0.11016325652599335, 'learning_rate': 3.753573459024954e-06, 'epoch': 17.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6833231449127197, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.704225352112676, 'eval_runtime': 1.3247, 'eval_samples_per_second': 81.528, 'eval_steps_per_second': 81.528, 'epoch': 17.0}
{'loss': 0.2377, 'grad_norm': 0.7503973245620728, 'learning_rate': 2.5023823060166365e-06, 'epoch': 18.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.626428484916687, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7368421052631579, 'eval_runtime': 1.3508, 'eval_samples_per_second': 79.95, 'eval_steps_per_second': 79.95, 'epoch': 18.0}
{'loss': 0.2289, 'grad_norm': 0.11069981008768082, 'learning_rate': 1.2511911530083183e-06, 'epoch': 19.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6206199526786804, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7368421052631579, 'eval_runtime': 1.3588, 'eval_samples_per_second': 79.482, 'eval_steps_per_second': 79.482, 'epoch': 19.0}
{'loss': 0.2367, 'grad_norm': 0.2089490443468094, 'learning_rate': 0.0, 'epoch': 20.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5976726412773132, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.75, 'eval_runtime': 1.3099, 'eval_samples_per_second': 82.451, 'eval_steps_per_second': 82.451, 'epoch': 20.0}


[I 2024-07-22 18:32:52,342] Trial 1 finished with value: 0.75 and parameters: {'learning_rate': 2.5023823060166362e-05, 'per_device_train_batch_size': 4, 'weight_decay': 0.1}. Best is trial 1 with value: 0.75.


{'train_runtime': 403.4074, 'train_samples_per_second': 30.094, 'train_steps_per_second': 7.536, 'train_loss': 0.3306632242704693, 'epoch': 20.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6739, 'grad_norm': 0.8792746663093567, 'learning_rate': 1.6548052823072353e-06, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6737048625946045, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3253, 'eval_samples_per_second': 81.489, 'eval_steps_per_second': 81.489, 'epoch': 1.0}
{'loss': 0.6643, 'grad_norm': 1.1002767086029053, 'learning_rate': 1.56771026744896e-06, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6701523065567017, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3046, 'eval_samples_per_second': 82.783, 'eval_steps_per_second': 82.783, 'epoch': 2.0}
{'loss': 0.6525, 'grad_norm': 1.4007169008255005, 'learning_rate': 1.4806152525906843e-06, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6670386791229248, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3191, 'eval_samples_per_second': 81.875, 'eval_steps_per_second': 81.875, 'epoch': 3.0}
{'loss': 0.6494, 'grad_norm': 0.9575030207633972, 'learning_rate': 1.393520237732409e-06, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6644340753555298, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3282, 'eval_samples_per_second': 81.312, 'eval_steps_per_second': 81.312, 'epoch': 4.0}
{'loss': 0.6464, 'grad_norm': 0.964844286441803, 'learning_rate': 1.3064252228741332e-06, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6618247628211975, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3067, 'eval_samples_per_second': 82.648, 'eval_steps_per_second': 82.648, 'epoch': 5.0}
{'loss': 0.6346, 'grad_norm': 1.0494186878204346, 'learning_rate': 1.2193302080158575e-06, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.658957302570343, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3548, 'eval_samples_per_second': 79.715, 'eval_steps_per_second': 79.715, 'epoch': 6.0}
{'loss': 0.6309, 'grad_norm': 0.9636508226394653, 'learning_rate': 1.1322351931575822e-06, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.656234622001648, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3277, 'eval_samples_per_second': 81.342, 'eval_steps_per_second': 81.342, 'epoch': 7.0}
{'loss': 0.6279, 'grad_norm': 1.7284208536148071, 'learning_rate': 1.0451401782993065e-06, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6540321707725525, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3481, 'eval_samples_per_second': 80.114, 'eval_steps_per_second': 80.114, 'epoch': 8.0}
{'loss': 0.6268, 'grad_norm': 0.9934782981872559, 'learning_rate': 9.580451634410312e-07, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6516427397727966, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.4268, 'eval_samples_per_second': 75.692, 'eval_steps_per_second': 75.692, 'epoch': 9.0}
{'loss': 0.6269, 'grad_norm': 1.148159384727478, 'learning_rate': 8.709501485827555e-07, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6494617462158203, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.35, 'eval_samples_per_second': 79.999, 'eval_steps_per_second': 79.999, 'epoch': 10.0}
{'loss': 0.6175, 'grad_norm': 1.1756051778793335, 'learning_rate': 7.8385513372448e-07, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6476446986198425, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3437, 'eval_samples_per_second': 80.376, 'eval_steps_per_second': 80.376, 'epoch': 11.0}
{'loss': 0.6192, 'grad_norm': 1.076886534690857, 'learning_rate': 6.967601188662045e-07, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6459041237831116, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.407, 'eval_samples_per_second': 76.761, 'eval_steps_per_second': 76.761, 'epoch': 12.0}
{'loss': 0.6151, 'grad_norm': 1.0902470350265503, 'learning_rate': 6.096651040079288e-07, 'epoch': 13.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6444900631904602, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3045, 'eval_samples_per_second': 82.788, 'eval_steps_per_second': 82.788, 'epoch': 13.0}
{'loss': 0.6159, 'grad_norm': 1.2322450876235962, 'learning_rate': 5.225700891496533e-07, 'epoch': 14.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.642810583114624, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.324, 'eval_samples_per_second': 81.568, 'eval_steps_per_second': 81.568, 'epoch': 14.0}
{'loss': 0.608, 'grad_norm': 1.1096493005752563, 'learning_rate': 4.3547507429137775e-07, 'epoch': 15.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6414681673049927, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.349, 'eval_samples_per_second': 80.059, 'eval_steps_per_second': 80.059, 'epoch': 15.0}
{'loss': 0.6092, 'grad_norm': 0.9575851559638977, 'learning_rate': 3.4838005943310224e-07, 'epoch': 16.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6404159069061279, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3467, 'eval_samples_per_second': 80.198, 'eval_steps_per_second': 80.198, 'epoch': 16.0}
{'loss': 0.6092, 'grad_norm': 1.4481687545776367, 'learning_rate': 2.6128504457482663e-07, 'epoch': 17.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6396685838699341, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3843, 'eval_samples_per_second': 78.016, 'eval_steps_per_second': 78.016, 'epoch': 17.0}
{'loss': 0.6048, 'grad_norm': 1.3512440919876099, 'learning_rate': 1.7419002971655112e-07, 'epoch': 18.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6390040516853333, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.4093, 'eval_samples_per_second': 76.635, 'eval_steps_per_second': 76.635, 'epoch': 18.0}
{'loss': 0.6051, 'grad_norm': 0.9811558127403259, 'learning_rate': 8.709501485827556e-08, 'epoch': 19.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6386662721633911, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3519, 'eval_samples_per_second': 79.887, 'eval_steps_per_second': 79.887, 'epoch': 19.0}
{'loss': 0.6037, 'grad_norm': 1.1254222393035889, 'learning_rate': 0.0, 'epoch': 20.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.638547956943512, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.3732, 'eval_samples_per_second': 78.647, 'eval_steps_per_second': 78.647, 'epoch': 20.0}


[I 2024-07-22 18:37:51,698] Trial 2 finished with value: 0.0 and parameters: {'learning_rate': 1.741900297165511e-06, 'per_device_train_batch_size': 32, 'weight_decay': 0.1}. Best is trial 1 with value: 0.75.


{'train_runtime': 298.6379, 'train_samples_per_second': 40.651, 'train_steps_per_second': 1.272, 'train_loss': 0.627057062952142, 'epoch': 20.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/760 [00:00<?, ?it/s]

{'loss': 0.6087, 'grad_norm': 2.2236969470977783, 'learning_rate': 3.8037244656287276e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5144739747047424, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7878787878787878, 'eval_runtime': 1.3432, 'eval_samples_per_second': 80.405, 'eval_steps_per_second': 80.405, 'epoch': 1.0}
{'loss': 0.4817, 'grad_norm': 2.9161508083343506, 'learning_rate': 3.603528441121953e-05, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4446064531803131, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7619047619047619, 'eval_runtime': 1.3611, 'eval_samples_per_second': 79.347, 'eval_steps_per_second': 79.347, 'epoch': 2.0}
{'loss': 0.3612, 'grad_norm': 4.918846607208252, 'learning_rate': 3.403332416615178e-05, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4443674683570862, 'eval_accuracy': 0.7870370370370371, 'eval_f1': 0.7676767676767676, 'eval_runtime': 1.3682, 'eval_samples_per_second': 78.939, 'eval_steps_per_second': 78.939, 'epoch': 3.0}
{'loss': 0.3431, 'grad_norm': 3.953037977218628, 'learning_rate': 3.203136392108403e-05, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4713999330997467, 'eval_accuracy': 0.7685185185185185, 'eval_f1': 0.7619047619047619, 'eval_runtime': 1.3597, 'eval_samples_per_second': 79.429, 'eval_steps_per_second': 79.429, 'epoch': 4.0}
{'loss': 0.3734, 'grad_norm': 2.119837999343872, 'learning_rate': 3.0029403676016273e-05, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.46691441535949707, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.6666666666666666, 'eval_runtime': 1.3733, 'eval_samples_per_second': 78.64, 'eval_steps_per_second': 78.64, 'epoch': 5.0}
{'loss': 0.3547, 'grad_norm': 2.348134994506836, 'learning_rate': 2.802744343094852e-05, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4107968807220459, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.8045977011494253, 'eval_runtime': 1.3858, 'eval_samples_per_second': 77.935, 'eval_steps_per_second': 77.935, 'epoch': 6.0}
{'loss': 0.3132, 'grad_norm': 1.1094578504562378, 'learning_rate': 2.602548318588077e-05, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4004603922367096, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7640449438202247, 'eval_runtime': 1.3596, 'eval_samples_per_second': 79.432, 'eval_steps_per_second': 79.432, 'epoch': 7.0}
{'loss': 0.2876, 'grad_norm': 1.836653232574463, 'learning_rate': 2.4023522940813018e-05, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4300420880317688, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 1.3572, 'eval_samples_per_second': 79.573, 'eval_steps_per_second': 79.573, 'epoch': 8.0}
{'loss': 0.2873, 'grad_norm': 5.80796480178833, 'learning_rate': 2.202156269574527e-05, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.3961818814277649, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7906976744186046, 'eval_runtime': 1.371, 'eval_samples_per_second': 78.776, 'eval_steps_per_second': 78.776, 'epoch': 9.0}
{'loss': 0.2899, 'grad_norm': 1.797717571258545, 'learning_rate': 2.0019602450677516e-05, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4382677376270294, 'eval_accuracy': 0.7962962962962963, 'eval_f1': 0.7027027027027027, 'eval_runtime': 1.3771, 'eval_samples_per_second': 78.427, 'eval_steps_per_second': 78.427, 'epoch': 10.0}
{'loss': 0.2679, 'grad_norm': 3.2474327087402344, 'learning_rate': 1.8017642205609764e-05, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4099508225917816, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.7532467532467533, 'eval_runtime': 1.2781, 'eval_samples_per_second': 84.501, 'eval_steps_per_second': 84.501, 'epoch': 11.0}
{'loss': 0.2552, 'grad_norm': 0.5652199387550354, 'learning_rate': 1.6015681960542014e-05, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.401418536901474, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.8, 'eval_runtime': 1.4019, 'eval_samples_per_second': 77.036, 'eval_steps_per_second': 77.036, 'epoch': 12.0}
{'loss': 0.2385, 'grad_norm': 0.6076584458351135, 'learning_rate': 1.401372171547426e-05, 'epoch': 13.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.3939518630504608, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 1.4301, 'eval_samples_per_second': 75.521, 'eval_steps_per_second': 75.521, 'epoch': 13.0}
{'loss': 0.2278, 'grad_norm': 2.5527710914611816, 'learning_rate': 1.2011761470406509e-05, 'epoch': 14.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.431776225566864, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8, 'eval_runtime': 1.9102, 'eval_samples_per_second': 56.538, 'eval_steps_per_second': 56.538, 'epoch': 14.0}
{'loss': 0.2236, 'grad_norm': 6.969526767730713, 'learning_rate': 1.0009801225338758e-05, 'epoch': 15.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4333774149417877, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 1.6419, 'eval_samples_per_second': 65.776, 'eval_steps_per_second': 65.776, 'epoch': 15.0}
{'loss': 0.1973, 'grad_norm': 5.894850254058838, 'learning_rate': 8.007840980271007e-06, 'epoch': 16.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42930635809898376, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7777777777777778, 'eval_runtime': 1.4913, 'eval_samples_per_second': 72.419, 'eval_steps_per_second': 72.419, 'epoch': 16.0}
{'loss': 0.2229, 'grad_norm': 0.8547477126121521, 'learning_rate': 6.0058807352032545e-06, 'epoch': 17.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.45977598428726196, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7631578947368421, 'eval_runtime': 3.8233, 'eval_samples_per_second': 28.248, 'eval_steps_per_second': 28.248, 'epoch': 17.0}
{'loss': 0.2025, 'grad_norm': 4.045485019683838, 'learning_rate': 4.0039204901355036e-06, 'epoch': 18.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.41858598589897156, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7951807228915663, 'eval_runtime': 3.5497, 'eval_samples_per_second': 30.425, 'eval_steps_per_second': 30.425, 'epoch': 18.0}
{'loss': 0.1759, 'grad_norm': 5.040531635284424, 'learning_rate': 2.0019602450677518e-06, 'epoch': 19.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42232856154441833, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8048780487804879, 'eval_runtime': 3.3954, 'eval_samples_per_second': 31.808, 'eval_steps_per_second': 31.808, 'epoch': 19.0}
{'loss': 0.1859, 'grad_norm': 5.933375358581543, 'learning_rate': 0.0, 'epoch': 20.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4181212782859802, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.8, 'eval_runtime': 3.0377, 'eval_samples_per_second': 35.553, 'eval_steps_per_second': 35.553, 'epoch': 20.0}


[I 2024-07-22 18:44:34,842] Trial 3 finished with value: 0.8 and parameters: {'learning_rate': 4.003920490135503e-05, 'per_device_train_batch_size': 16, 'weight_decay': 0}. Best is trial 3 with value: 0.8.


{'train_runtime': 402.468, 'train_samples_per_second': 30.164, 'train_steps_per_second': 1.888, 'train_loss': 0.2949091039205852, 'epoch': 20.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/1520 [00:00<?, ?it/s]

{'loss': 0.6582, 'grad_norm': 2.8558311462402344, 'learning_rate': 3.251287782138113e-06, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.661909818649292, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 3.0653, 'eval_samples_per_second': 35.233, 'eval_steps_per_second': 35.233, 'epoch': 1.0}
{'loss': 0.6353, 'grad_norm': 1.9757835865020752, 'learning_rate': 3.0801673725518966e-06, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6474462151527405, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 3.2149, 'eval_samples_per_second': 33.594, 'eval_steps_per_second': 33.594, 'epoch': 2.0}
{'loss': 0.6112, 'grad_norm': 1.7389370203018188, 'learning_rate': 2.90904696296568e-06, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.631032407283783, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 2.9795, 'eval_samples_per_second': 36.247, 'eval_steps_per_second': 36.247, 'epoch': 3.0}
{'loss': 0.5894, 'grad_norm': 2.1705520153045654, 'learning_rate': 2.7379265533794637e-06, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6185246706008911, 'eval_accuracy': 0.6944444444444444, 'eval_f1': 0.47619047619047616, 'eval_runtime': 2.9571, 'eval_samples_per_second': 36.523, 'eval_steps_per_second': 36.523, 'epoch': 4.0}
{'loss': 0.5585, 'grad_norm': 3.486137628555298, 'learning_rate': 2.566806143793247e-06, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5953589081764221, 'eval_accuracy': 0.7685185185185185, 'eval_f1': 0.6575342465753424, 'eval_runtime': 3.0317, 'eval_samples_per_second': 35.623, 'eval_steps_per_second': 35.623, 'epoch': 5.0}
{'loss': 0.527, 'grad_norm': 1.7290242910385132, 'learning_rate': 2.3956857342070304e-06, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5712145566940308, 'eval_accuracy': 0.7870370370370371, 'eval_f1': 0.7160493827160493, 'eval_runtime': 3.1125, 'eval_samples_per_second': 34.699, 'eval_steps_per_second': 34.699, 'epoch': 6.0}
{'loss': 0.5077, 'grad_norm': 3.275275230407715, 'learning_rate': 2.224565324620814e-06, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5543832778930664, 'eval_accuracy': 0.7962962962962963, 'eval_f1': 0.7317073170731707, 'eval_runtime': 3.1468, 'eval_samples_per_second': 34.321, 'eval_steps_per_second': 34.321, 'epoch': 7.0}
{'loss': 0.4799, 'grad_norm': 3.141247034072876, 'learning_rate': 2.0534449150345976e-06, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5391558408737183, 'eval_accuracy': 0.7962962962962963, 'eval_f1': 0.7441860465116279, 'eval_runtime': 2.9484, 'eval_samples_per_second': 36.63, 'eval_steps_per_second': 36.63, 'epoch': 8.0}
{'loss': 0.4529, 'grad_norm': 3.9375250339508057, 'learning_rate': 1.8823245054483814e-06, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.523884117603302, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7529411764705882, 'eval_runtime': 3.1966, 'eval_samples_per_second': 33.786, 'eval_steps_per_second': 33.786, 'epoch': 9.0}
{'loss': 0.4434, 'grad_norm': 3.011784553527832, 'learning_rate': 1.7112040958621647e-06, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5145600438117981, 'eval_accuracy': 0.7870370370370371, 'eval_f1': 0.735632183908046, 'eval_runtime': 3.0484, 'eval_samples_per_second': 35.429, 'eval_steps_per_second': 35.429, 'epoch': 10.0}
{'loss': 0.4147, 'grad_norm': 5.638874053955078, 'learning_rate': 1.5400836862759483e-06, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5063758492469788, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7333333333333333, 'eval_runtime': 2.9866, 'eval_samples_per_second': 36.161, 'eval_steps_per_second': 36.161, 'epoch': 11.0}
{'loss': 0.4252, 'grad_norm': 2.106724739074707, 'learning_rate': 1.3689632766897319e-06, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.49955928325653076, 'eval_accuracy': 0.7592592592592593, 'eval_f1': 0.7291666666666666, 'eval_runtime': 3.2698, 'eval_samples_per_second': 33.03, 'eval_steps_per_second': 33.03, 'epoch': 12.0}
{'loss': 0.4053, 'grad_norm': 3.20176362991333, 'learning_rate': 1.1978428671035152e-06, 'epoch': 13.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4952186942100525, 'eval_accuracy': 0.7685185185185185, 'eval_f1': 0.7422680412371134, 'eval_runtime': 3.2121, 'eval_samples_per_second': 33.622, 'eval_steps_per_second': 33.622, 'epoch': 13.0}
{'loss': 0.4054, 'grad_norm': 2.563242197036743, 'learning_rate': 1.0267224575172988e-06, 'epoch': 14.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4886725842952728, 'eval_accuracy': 0.7685185185185185, 'eval_f1': 0.7311827956989247, 'eval_runtime': 3.203, 'eval_samples_per_second': 33.719, 'eval_steps_per_second': 33.719, 'epoch': 14.0}
{'loss': 0.3971, 'grad_norm': 5.385499954223633, 'learning_rate': 8.556020479310824e-07, 'epoch': 15.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4855497479438782, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7446808510638298, 'eval_runtime': 3.1229, 'eval_samples_per_second': 34.583, 'eval_steps_per_second': 34.583, 'epoch': 15.0}
{'loss': 0.3889, 'grad_norm': 3.911233425140381, 'learning_rate': 6.844816383448659e-07, 'epoch': 16.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.48216596245765686, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7446808510638298, 'eval_runtime': 3.0692, 'eval_samples_per_second': 35.188, 'eval_steps_per_second': 35.188, 'epoch': 16.0}
{'loss': 0.3938, 'grad_norm': 4.748416423797607, 'learning_rate': 5.133612287586494e-07, 'epoch': 17.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.48028481006622314, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7446808510638298, 'eval_runtime': 2.9412, 'eval_samples_per_second': 36.72, 'eval_steps_per_second': 36.72, 'epoch': 17.0}
{'loss': 0.3799, 'grad_norm': 4.575582504272461, 'learning_rate': 3.4224081917243296e-07, 'epoch': 18.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.47847500443458557, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7446808510638298, 'eval_runtime': 2.9761, 'eval_samples_per_second': 36.289, 'eval_steps_per_second': 36.289, 'epoch': 18.0}
{'loss': 0.3704, 'grad_norm': 0.9875293374061584, 'learning_rate': 1.7112040958621648e-07, 'epoch': 19.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.47760385274887085, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7446808510638298, 'eval_runtime': 3.102, 'eval_samples_per_second': 34.816, 'eval_steps_per_second': 34.816, 'epoch': 19.0}
{'loss': 0.3606, 'grad_norm': 3.492450475692749, 'learning_rate': 0.0, 'epoch': 20.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.47719600796699524, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7446808510638298, 'eval_runtime': 3.0355, 'eval_samples_per_second': 35.579, 'eval_steps_per_second': 35.579, 'epoch': 20.0}


[I 2024-07-22 18:57:32,824] Trial 4 finished with value: 0.7446808510638298 and parameters: {'learning_rate': 3.4224081917243294e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.1}. Best is trial 3 with value: 0.8.


{'train_runtime': 777.1981, 'train_samples_per_second': 15.62, 'train_steps_per_second': 1.956, 'train_loss': 0.47023995048121403, 'epoch': 20.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/3040 [00:00<?, ?it/s]

{'loss': 0.647, 'grad_norm': 3.2664005756378174, 'learning_rate': 4.110026481121034e-06, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.6484050750732422, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 3.0691, 'eval_samples_per_second': 35.189, 'eval_steps_per_second': 35.189, 'epoch': 1.0}
{'loss': 0.6061, 'grad_norm': 2.278211832046509, 'learning_rate': 3.893709297904137e-06, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 18:58:57,573] Trial 5 pruned. 


{'eval_loss': 0.6051007509231567, 'eval_accuracy': 0.6666666666666666, 'eval_f1': 0.3333333333333333, 'eval_runtime': 3.1454, 'eval_samples_per_second': 34.335, 'eval_steps_per_second': 34.335, 'epoch': 2.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6017, 'grad_norm': 2.235746383666992, 'learning_rate': 6.437880825955943e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5076414942741394, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7647058823529411, 'eval_runtime': 3.0018, 'eval_samples_per_second': 35.979, 'eval_steps_per_second': 35.979, 'epoch': 1.0}
{'loss': 0.5077, 'grad_norm': 1.6567260026931763, 'learning_rate': 6.099044993010895e-05, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4296264350414276, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7640449438202247, 'eval_runtime': 3.0101, 'eval_samples_per_second': 35.88, 'eval_steps_per_second': 35.88, 'epoch': 2.0}
{'loss': 0.3994, 'grad_norm': 1.185099482536316, 'learning_rate': 5.760209160065844e-05, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4584951102733612, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7407407407407407, 'eval_runtime': 3.2502, 'eval_samples_per_second': 33.229, 'eval_steps_per_second': 33.229, 'epoch': 3.0}
{'loss': 0.3831, 'grad_norm': 2.5716211795806885, 'learning_rate': 5.421373327120795e-05, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4320763051509857, 'eval_accuracy': 0.7962962962962963, 'eval_f1': 0.7755102040816326, 'eval_runtime': 3.1615, 'eval_samples_per_second': 34.161, 'eval_steps_per_second': 34.161, 'epoch': 4.0}
{'loss': 0.3434, 'grad_norm': 1.7709401845932007, 'learning_rate': 5.082537494175745e-05, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.47419893741607666, 'eval_accuracy': 0.7870370370370371, 'eval_f1': 0.676056338028169, 'eval_runtime': 3.0998, 'eval_samples_per_second': 34.841, 'eval_steps_per_second': 34.841, 'epoch': 5.0}
{'loss': 0.3395, 'grad_norm': 3.1246211528778076, 'learning_rate': 4.7437016612306955e-05, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.3948795795440674, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.7865168539325843, 'eval_runtime': 3.1319, 'eval_samples_per_second': 34.484, 'eval_steps_per_second': 34.484, 'epoch': 6.0}
{'loss': 0.3103, 'grad_norm': 1.7375659942626953, 'learning_rate': 4.4048658282856464e-05, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.41141563653945923, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.775, 'eval_runtime': 3.0158, 'eval_samples_per_second': 35.812, 'eval_steps_per_second': 35.812, 'epoch': 7.0}
{'loss': 0.2769, 'grad_norm': 4.413098335266113, 'learning_rate': 4.066029995340596e-05, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.39525845646858215, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7857142857142857, 'eval_runtime': 3.0115, 'eval_samples_per_second': 35.862, 'eval_steps_per_second': 35.862, 'epoch': 8.0}
{'loss': 0.2563, 'grad_norm': 2.9831008911132812, 'learning_rate': 3.727194162395547e-05, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.40715909004211426, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 3.1038, 'eval_samples_per_second': 34.796, 'eval_steps_per_second': 34.796, 'epoch': 9.0}
{'loss': 0.2674, 'grad_norm': 1.3261574506759644, 'learning_rate': 3.388358329450497e-05, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43495967984199524, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7792207792207793, 'eval_runtime': 1.7921, 'eval_samples_per_second': 60.264, 'eval_steps_per_second': 60.264, 'epoch': 10.0}
{'loss': 0.2568, 'grad_norm': 5.712589740753174, 'learning_rate': 3.0495224965054474e-05, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4087181091308594, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.7710843373493976, 'eval_runtime': 3.5582, 'eval_samples_per_second': 30.352, 'eval_steps_per_second': 30.352, 'epoch': 11.0}
{'loss': 0.2428, 'grad_norm': 1.153366208076477, 'learning_rate': 2.7106866635603976e-05, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4014342129230499, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7777777777777778, 'eval_runtime': 3.6889, 'eval_samples_per_second': 29.277, 'eval_steps_per_second': 29.277, 'epoch': 12.0}
{'loss': 0.241, 'grad_norm': 1.9216697216033936, 'learning_rate': 2.3718508306153478e-05, 'epoch': 13.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.39598986506462097, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.8, 'eval_runtime': 1.9643, 'eval_samples_per_second': 54.982, 'eval_steps_per_second': 54.982, 'epoch': 13.0}
{'loss': 0.2231, 'grad_norm': 3.6871097087860107, 'learning_rate': 2.033014997670298e-05, 'epoch': 14.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.44322890043258667, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7692307692307693, 'eval_runtime': 2.1064, 'eval_samples_per_second': 51.272, 'eval_steps_per_second': 51.272, 'epoch': 14.0}
{'loss': 0.2365, 'grad_norm': 2.8310365676879883, 'learning_rate': 1.6941791647252484e-05, 'epoch': 15.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42024871706962585, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7951807228915663, 'eval_runtime': 2.1821, 'eval_samples_per_second': 49.494, 'eval_steps_per_second': 49.494, 'epoch': 15.0}
{'loss': 0.1844, 'grad_norm': 1.6496710777282715, 'learning_rate': 1.3553433317801988e-05, 'epoch': 16.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4223814606666565, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7906976744186046, 'eval_runtime': 1.8981, 'eval_samples_per_second': 56.9, 'eval_steps_per_second': 56.9, 'epoch': 16.0}
{'loss': 0.2139, 'grad_norm': 2.702352523803711, 'learning_rate': 1.016507498835149e-05, 'epoch': 17.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43028292059898376, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7804878048780488, 'eval_runtime': 2.0524, 'eval_samples_per_second': 52.621, 'eval_steps_per_second': 52.621, 'epoch': 17.0}
{'loss': 0.1885, 'grad_norm': 2.4044137001037598, 'learning_rate': 6.776716658900994e-06, 'epoch': 18.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43085402250289917, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 1.9684, 'eval_samples_per_second': 54.868, 'eval_steps_per_second': 54.868, 'epoch': 18.0}
{'loss': 0.1761, 'grad_norm': 3.250368595123291, 'learning_rate': 3.388358329450497e-06, 'epoch': 19.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42545264959335327, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 1.9837, 'eval_samples_per_second': 54.443, 'eval_steps_per_second': 54.443, 'epoch': 19.0}
{'loss': 0.1693, 'grad_norm': 1.0510120391845703, 'learning_rate': 0.0, 'epoch': 20.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4209221601486206, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7951807228915663, 'eval_runtime': 1.9823, 'eval_samples_per_second': 54.482, 'eval_steps_per_second': 54.482, 'epoch': 20.0}


[I 2024-07-22 19:08:06,930] Trial 6 finished with value: 0.7951807228915663 and parameters: {'learning_rate': 6.776716658900994e-05, 'per_device_train_batch_size': 32, 'weight_decay': 0.1}. Best is trial 3 with value: 0.8.


{'train_runtime': 548.6709, 'train_samples_per_second': 22.126, 'train_steps_per_second': 0.693, 'train_loss': 0.29090558666931954, 'epoch': 20.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/3040 [00:00<?, ?it/s]

{'loss': 0.6448, 'grad_norm': 3.475759506225586, 'learning_rate': 4.553596979097871e-06, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 19:08:47,154] Trial 7 pruned. 


{'eval_loss': 0.6450249552726746, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 3.3711, 'eval_samples_per_second': 32.037, 'eval_steps_per_second': 32.037, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/3040 [00:00<?, ?it/s]

{'loss': 0.6663, 'grad_norm': 0.1707754135131836, 'learning_rate': 0.000657950051128943, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 19:09:27,921] Trial 8 pruned. 


{'eval_loss': 0.6766590476036072, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 2.9766, 'eval_samples_per_second': 36.282, 'eval_steps_per_second': 36.282, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/760 [00:00<?, ?it/s]

{'loss': 0.6172, 'grad_norm': 1.924469232559204, 'learning_rate': 3.0613109678761515e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5638809204101562, 'eval_accuracy': 0.7870370370370371, 'eval_f1': 0.7766990291262136, 'eval_runtime': 3.3951, 'eval_samples_per_second': 31.811, 'eval_steps_per_second': 31.811, 'epoch': 1.0}
{'loss': 0.4893, 'grad_norm': 1.8542439937591553, 'learning_rate': 2.9001893379879334e-05, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4489395320415497, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7446808510638298, 'eval_runtime': 3.1556, 'eval_samples_per_second': 34.225, 'eval_steps_per_second': 34.225, 'epoch': 2.0}
{'loss': 0.3726, 'grad_norm': 2.808680295944214, 'learning_rate': 2.739067708099715e-05, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43155637383461, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7878787878787878, 'eval_runtime': 3.2392, 'eval_samples_per_second': 33.342, 'eval_steps_per_second': 33.342, 'epoch': 3.0}
{'loss': 0.3489, 'grad_norm': 6.453937530517578, 'learning_rate': 2.5779460782114963e-05, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.49663835763931274, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7692307692307693, 'eval_runtime': 3.4655, 'eval_samples_per_second': 31.165, 'eval_steps_per_second': 31.165, 'epoch': 4.0}
{'loss': 0.3506, 'grad_norm': 1.6806132793426514, 'learning_rate': 2.4168244483232778e-05, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4386102557182312, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.7710843373493976, 'eval_runtime': 2.6812, 'eval_samples_per_second': 40.281, 'eval_steps_per_second': 40.281, 'epoch': 5.0}
{'loss': 0.3439, 'grad_norm': 3.0077972412109375, 'learning_rate': 2.2557028184350593e-05, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.41894611716270447, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7640449438202247, 'eval_runtime': 2.9297, 'eval_samples_per_second': 36.864, 'eval_steps_per_second': 36.864, 'epoch': 6.0}
{'loss': 0.3151, 'grad_norm': 1.6673396825790405, 'learning_rate': 2.0945811885468408e-05, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4158412516117096, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7727272727272727, 'eval_runtime': 3.1586, 'eval_samples_per_second': 34.192, 'eval_steps_per_second': 34.192, 'epoch': 7.0}
{'loss': 0.2976, 'grad_norm': 1.714518427848816, 'learning_rate': 1.9334595586586222e-05, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4301123023033142, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 2.9774, 'eval_samples_per_second': 36.273, 'eval_steps_per_second': 36.273, 'epoch': 8.0}
{'loss': 0.2916, 'grad_norm': 4.826058387756348, 'learning_rate': 1.7723379287704037e-05, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4146585166454315, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7906976744186046, 'eval_runtime': 3.3705, 'eval_samples_per_second': 32.043, 'eval_steps_per_second': 32.043, 'epoch': 9.0}
{'loss': 0.2846, 'grad_norm': 2.340132713317871, 'learning_rate': 1.6112162988821852e-05, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.49173858761787415, 'eval_accuracy': 0.7870370370370371, 'eval_f1': 0.676056338028169, 'eval_runtime': 3.3437, 'eval_samples_per_second': 32.299, 'eval_steps_per_second': 32.299, 'epoch': 10.0}
{'loss': 0.27, 'grad_norm': 5.08394193649292, 'learning_rate': 1.4500946689939667e-05, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.41712403297424316, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7692307692307693, 'eval_runtime': 3.2141, 'eval_samples_per_second': 33.602, 'eval_steps_per_second': 33.602, 'epoch': 11.0}
{'loss': 0.274, 'grad_norm': 0.5916375517845154, 'learning_rate': 1.2889730391057482e-05, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 19:16:15,815] Trial 9 pruned. 


{'eval_loss': 0.41921254992485046, 'eval_accuracy': 0.7870370370370371, 'eval_f1': 0.7472527472527473, 'eval_runtime': 4.525, 'eval_samples_per_second': 23.867, 'eval_steps_per_second': 23.867, 'epoch': 12.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/760 [00:00<?, ?it/s]

{'loss': 0.562, 'grad_norm': 3.1007823944091797, 'learning_rate': 0.00013740924653806125, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 19:16:42,253] Trial 10 pruned. 


{'eval_loss': 0.9827998876571655, 'eval_accuracy': 0.5648148148148148, 'eval_f1': 0.656934306569343, 'eval_runtime': 2.0862, 'eval_samples_per_second': 51.77, 'eval_steps_per_second': 51.77, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6236, 'grad_norm': 1.6170011758804321, 'learning_rate': 4.931599574936885e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.566722571849823, 'eval_accuracy': 0.7962962962962963, 'eval_f1': 0.7755102040816326, 'eval_runtime': 1.8876, 'eval_samples_per_second': 57.216, 'eval_steps_per_second': 57.216, 'epoch': 1.0}
{'loss': 0.5011, 'grad_norm': 1.4326263666152954, 'learning_rate': 4.672041702571786e-05, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.44851988554000854, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.803921568627451, 'eval_runtime': 3.0356, 'eval_samples_per_second': 35.578, 'eval_steps_per_second': 35.578, 'epoch': 2.0}
{'loss': 0.3953, 'grad_norm': 1.75070321559906, 'learning_rate': 4.412483830206687e-05, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.44768139719963074, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7619047619047619, 'eval_runtime': 2.905, 'eval_samples_per_second': 37.178, 'eval_steps_per_second': 37.178, 'epoch': 3.0}
{'loss': 0.3741, 'grad_norm': 3.686544418334961, 'learning_rate': 4.152925957841588e-05, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.48877575993537903, 'eval_accuracy': 0.7592592592592593, 'eval_f1': 0.7547169811320755, 'eval_runtime': 2.7922, 'eval_samples_per_second': 38.679, 'eval_steps_per_second': 38.679, 'epoch': 4.0}
{'loss': 0.3516, 'grad_norm': 2.1767141819000244, 'learning_rate': 3.893368085476488e-05, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43178269267082214, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7619047619047619, 'eval_runtime': 2.8475, 'eval_samples_per_second': 37.928, 'eval_steps_per_second': 37.928, 'epoch': 5.0}
{'loss': 0.335, 'grad_norm': 2.2859718799591064, 'learning_rate': 3.633810213111389e-05, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42945805191993713, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7407407407407407, 'eval_runtime': 1.662, 'eval_samples_per_second': 64.982, 'eval_steps_per_second': 64.982, 'epoch': 6.0}
{'loss': 0.3109, 'grad_norm': 2.3660476207733154, 'learning_rate': 3.37425234074629e-05, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42179247736930847, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7804878048780488, 'eval_runtime': 1.6491, 'eval_samples_per_second': 65.489, 'eval_steps_per_second': 65.489, 'epoch': 7.0}
{'loss': 0.2764, 'grad_norm': 4.094949722290039, 'learning_rate': 3.1146944683811906e-05, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.401837557554245, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7906976744186046, 'eval_runtime': 1.5584, 'eval_samples_per_second': 69.3, 'eval_steps_per_second': 69.3, 'epoch': 8.0}
{'loss': 0.2715, 'grad_norm': 2.6469626426696777, 'learning_rate': 2.8551365960160916e-05, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.41431599855422974, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8048780487804879, 'eval_runtime': 1.5667, 'eval_samples_per_second': 68.934, 'eval_steps_per_second': 68.934, 'epoch': 9.0}
{'loss': 0.274, 'grad_norm': 1.4885623455047607, 'learning_rate': 2.5955787236509923e-05, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4275522828102112, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 1.6233, 'eval_samples_per_second': 66.529, 'eval_steps_per_second': 66.529, 'epoch': 10.0}
{'loss': 0.2524, 'grad_norm': 3.2575294971466064, 'learning_rate': 2.336020851285893e-05, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4016493558883667, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7727272727272727, 'eval_runtime': 1.5791, 'eval_samples_per_second': 68.392, 'eval_steps_per_second': 68.392, 'epoch': 11.0}
{'loss': 0.2414, 'grad_norm': 0.6774728298187256, 'learning_rate': 2.076462978920794e-05, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4064134955406189, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.7764705882352941, 'eval_runtime': 1.5483, 'eval_samples_per_second': 69.755, 'eval_steps_per_second': 69.755, 'epoch': 12.0}
{'loss': 0.2339, 'grad_norm': 1.4407923221588135, 'learning_rate': 1.8169051065556946e-05, 'epoch': 13.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4153819680213928, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7692307692307693, 'eval_runtime': 1.6128, 'eval_samples_per_second': 66.966, 'eval_steps_per_second': 66.966, 'epoch': 13.0}
{'loss': 0.2313, 'grad_norm': 3.703368902206421, 'learning_rate': 1.5573472341905953e-05, 'epoch': 14.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4326992332935333, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8, 'eval_runtime': 1.5382, 'eval_samples_per_second': 70.213, 'eval_steps_per_second': 70.213, 'epoch': 14.0}
{'loss': 0.2583, 'grad_norm': 3.3059895038604736, 'learning_rate': 1.2977893618254961e-05, 'epoch': 15.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.419562965631485, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7951807228915663, 'eval_runtime': 1.5148, 'eval_samples_per_second': 71.296, 'eval_steps_per_second': 71.296, 'epoch': 15.0}
{'loss': 0.1902, 'grad_norm': 2.676668643951416, 'learning_rate': 1.038231489460397e-05, 'epoch': 16.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4283134639263153, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8048780487804879, 'eval_runtime': 1.5469, 'eval_samples_per_second': 69.818, 'eval_steps_per_second': 69.818, 'epoch': 16.0}
{'loss': 0.2405, 'grad_norm': 2.3443617820739746, 'learning_rate': 7.786736170952976e-06, 'epoch': 17.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4321509003639221, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7857142857142857, 'eval_runtime': 1.4953, 'eval_samples_per_second': 72.227, 'eval_steps_per_second': 72.227, 'epoch': 17.0}
{'loss': 0.2009, 'grad_norm': 2.624833583831787, 'learning_rate': 5.191157447301985e-06, 'epoch': 18.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43022415041923523, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8, 'eval_runtime': 1.6104, 'eval_samples_per_second': 67.062, 'eval_steps_per_second': 67.062, 'epoch': 18.0}
{'loss': 0.1835, 'grad_norm': 2.325795888900757, 'learning_rate': 2.5955787236509924e-06, 'epoch': 19.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4396543800830841, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7692307692307693, 'eval_runtime': 1.5959, 'eval_samples_per_second': 67.673, 'eval_steps_per_second': 67.673, 'epoch': 19.0}
{'loss': 0.1846, 'grad_norm': 2.1163840293884277, 'learning_rate': 0.0, 'epoch': 20.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42936187982559204, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8, 'eval_runtime': 2.9391, 'eval_samples_per_second': 36.746, 'eval_steps_per_second': 36.746, 'epoch': 20.0}


[I 2024-07-22 19:23:50,466] Trial 11 finished with value: 0.8 and parameters: {'learning_rate': 5.1911574473019846e-05, 'per_device_train_batch_size': 32, 'weight_decay': 0}. Best is trial 3 with value: 0.8.


{'train_runtime': 427.5278, 'train_samples_per_second': 28.396, 'train_steps_per_second': 0.889, 'train_loss': 0.2965249193342108, 'epoch': 20.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/760 [00:00<?, ?it/s]

{'loss': 0.6406, 'grad_norm': 1.6492564678192139, 'learning_rate': 1.419936589533917e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 19:24:24,421] Trial 12 pruned. 


{'eval_loss': 0.6324167847633362, 'eval_accuracy': 0.6018518518518519, 'eval_f1': 0.0851063829787234, 'eval_runtime': 3.0247, 'eval_samples_per_second': 35.706, 'eval_steps_per_second': 35.706, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/1520 [00:00<?, ?it/s]

{'loss': 0.6409, 'grad_norm': 1.7987300157546997, 'learning_rate': 0.00015786132872382927, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.7808815836906433, 'eval_accuracy': 0.5185185185185185, 'eval_f1': 0.6338028169014085, 'eval_runtime': 2.6827, 'eval_samples_per_second': 40.258, 'eval_steps_per_second': 40.258, 'epoch': 1.0}
{'loss': 0.629, 'grad_norm': 1.5733555555343628, 'learning_rate': 0.00014955283773836458, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 19:25:31,639] Trial 13 pruned. 


{'eval_loss': 0.593829870223999, 'eval_accuracy': 0.7129629629629629, 'eval_f1': 0.7304347826086957, 'eval_runtime': 2.7252, 'eval_samples_per_second': 39.63, 'eval_steps_per_second': 39.63, 'epoch': 2.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6528, 'grad_norm': 1.1451750993728638, 'learning_rate': 1.265824070699361e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 19:25:58,938] Trial 14 pruned. 


{'eval_loss': 0.651501476764679, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 2.6124, 'eval_samples_per_second': 41.341, 'eval_steps_per_second': 41.341, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/760 [00:00<?, ?it/s]

{'loss': 0.573, 'grad_norm': 2.090183973312378, 'learning_rate': 7.97559413705037e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 19:26:29,448] Trial 15 pruned. 


{'eval_loss': 0.6655920743942261, 'eval_accuracy': 0.6759259259259259, 'eval_f1': 0.7154471544715447, 'eval_runtime': 2.68, 'eval_samples_per_second': 40.298, 'eval_steps_per_second': 40.298, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/760 [00:00<?, ?it/s]

{'loss': 0.6626, 'grad_norm': 0.3365422487258911, 'learning_rate': 0.0002644161535580553, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 19:27:00,592] Trial 16 pruned. 


{'eval_loss': 0.6440316438674927, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 2.6659, 'eval_samples_per_second': 40.512, 'eval_steps_per_second': 40.512, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6248, 'grad_norm': 1.4312330484390259, 'learning_rate': 4.820096223364518e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5717977285385132, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7789473684210526, 'eval_runtime': 2.6541, 'eval_samples_per_second': 40.692, 'eval_steps_per_second': 40.692, 'epoch': 1.0}
{'loss': 0.4996, 'grad_norm': 1.3376221656799316, 'learning_rate': 4.566406948450596e-05, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4536716341972351, 'eval_accuracy': 0.7962962962962963, 'eval_f1': 0.7843137254901961, 'eval_runtime': 2.6991, 'eval_samples_per_second': 40.014, 'eval_steps_per_second': 40.014, 'epoch': 2.0}
{'loss': 0.3967, 'grad_norm': 1.621895670890808, 'learning_rate': 4.312717673536674e-05, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4467668831348419, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.7764705882352941, 'eval_runtime': 2.7295, 'eval_samples_per_second': 39.568, 'eval_steps_per_second': 39.568, 'epoch': 3.0}
{'loss': 0.3726, 'grad_norm': 3.167464256286621, 'learning_rate': 4.059028398622752e-05, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4903717637062073, 'eval_accuracy': 0.75, 'eval_f1': 0.7476635514018691, 'eval_runtime': 2.7199, 'eval_samples_per_second': 39.707, 'eval_steps_per_second': 39.707, 'epoch': 4.0}
{'loss': 0.3526, 'grad_norm': 2.128338575363159, 'learning_rate': 3.8053391237088296e-05, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.41907674074172974, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7727272727272727, 'eval_runtime': 2.9936, 'eval_samples_per_second': 36.077, 'eval_steps_per_second': 36.077, 'epoch': 5.0}
{'loss': 0.3378, 'grad_norm': 3.2008635997772217, 'learning_rate': 3.5516498487949076e-05, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.44276559352874756, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.775, 'eval_runtime': 7.1048, 'eval_samples_per_second': 15.201, 'eval_steps_per_second': 15.201, 'epoch': 6.0}
{'loss': 0.3116, 'grad_norm': 2.0900981426239014, 'learning_rate': 3.297960573880986e-05, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.432114839553833, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 2.5255, 'eval_samples_per_second': 42.765, 'eval_steps_per_second': 42.765, 'epoch': 7.0}
{'loss': 0.2797, 'grad_norm': 3.856515645980835, 'learning_rate': 3.0442712989670638e-05, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.41148641705513, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7951807228915663, 'eval_runtime': 3.4344, 'eval_samples_per_second': 31.446, 'eval_steps_per_second': 31.446, 'epoch': 8.0}
{'loss': 0.2745, 'grad_norm': 2.641385078430176, 'learning_rate': 2.7905820240531422e-05, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.41695594787597656, 'eval_accuracy': 0.8611111111111112, 'eval_f1': 0.8192771084337349, 'eval_runtime': 3.6122, 'eval_samples_per_second': 29.899, 'eval_steps_per_second': 29.899, 'epoch': 9.0}
{'loss': 0.2724, 'grad_norm': 1.418081521987915, 'learning_rate': 2.53689274913922e-05, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43564319610595703, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.759493670886076, 'eval_runtime': 4.1766, 'eval_samples_per_second': 25.858, 'eval_steps_per_second': 25.858, 'epoch': 10.0}
{'loss': 0.2468, 'grad_norm': 3.393155097961426, 'learning_rate': 2.283203474225298e-05, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4100751578807831, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8095238095238095, 'eval_runtime': 3.643, 'eval_samples_per_second': 29.646, 'eval_steps_per_second': 29.646, 'epoch': 11.0}
{'loss': 0.2415, 'grad_norm': 1.0580753087997437, 'learning_rate': 2.029514199311376e-05, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43637579679489136, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 3.894, 'eval_samples_per_second': 27.735, 'eval_steps_per_second': 27.735, 'epoch': 12.0}
{'loss': 0.2381, 'grad_norm': 2.8446133136749268, 'learning_rate': 1.7758249243974538e-05, 'epoch': 13.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4196818768978119, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 3.8186, 'eval_samples_per_second': 28.282, 'eval_steps_per_second': 28.282, 'epoch': 13.0}
{'loss': 0.2205, 'grad_norm': 3.275880813598633, 'learning_rate': 1.5221356494835319e-05, 'epoch': 14.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4433077573776245, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 3.6962, 'eval_samples_per_second': 29.219, 'eval_steps_per_second': 29.219, 'epoch': 14.0}
{'loss': 0.2273, 'grad_norm': 2.818911075592041, 'learning_rate': 1.26844637456961e-05, 'epoch': 15.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.46218812465667725, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7368421052631579, 'eval_runtime': 2.1514, 'eval_samples_per_second': 50.2, 'eval_steps_per_second': 50.2, 'epoch': 15.0}
{'loss': 0.1969, 'grad_norm': 2.884983777999878, 'learning_rate': 1.014757099655688e-05, 'epoch': 16.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4463660418987274, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 4.9974, 'eval_samples_per_second': 21.611, 'eval_steps_per_second': 21.611, 'epoch': 16.0}
{'loss': 0.239, 'grad_norm': 2.9495344161987305, 'learning_rate': 7.6106782474176595e-06, 'epoch': 17.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4382312297821045, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7619047619047619, 'eval_runtime': 5.8163, 'eval_samples_per_second': 18.568, 'eval_steps_per_second': 18.568, 'epoch': 17.0}
{'loss': 0.2092, 'grad_norm': 2.8963677883148193, 'learning_rate': 5.07378549827844e-06, 'epoch': 18.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4390203058719635, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.775, 'eval_runtime': 3.5989, 'eval_samples_per_second': 30.009, 'eval_steps_per_second': 30.009, 'epoch': 18.0}
{'loss': 0.1831, 'grad_norm': 3.232649326324463, 'learning_rate': 2.53689274913922e-06, 'epoch': 19.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.44949662685394287, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 3.5685, 'eval_samples_per_second': 30.265, 'eval_steps_per_second': 30.265, 'epoch': 19.0}
{'loss': 0.1885, 'grad_norm': 2.213038921356201, 'learning_rate': 0.0, 'epoch': 20.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.44160324335098267, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 2.1085, 'eval_samples_per_second': 51.221, 'eval_steps_per_second': 51.221, 'epoch': 20.0}


[I 2024-07-22 20:34:49,530] Trial 17 finished with value: 0.7848101265822784 and parameters: {'learning_rate': 5.07378549827844e-05, 'per_device_train_batch_size': 32, 'weight_decay': 0}. Best is trial 3 with value: 0.8.


{'train_runtime': 4068.2215, 'train_samples_per_second': 2.984, 'train_steps_per_second': 0.093, 'train_loss': 0.29565282620881733, 'epoch': 20.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/1520 [00:00<?, ?it/s]

{'loss': 0.6321, 'grad_norm': 3.936964988708496, 'learning_rate': 1.168680561691187e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 20:35:14,487] Trial 18 pruned. 


{'eval_loss': 0.6136704087257385, 'eval_accuracy': 0.7129629629629629, 'eval_f1': 0.5230769230769231, 'eval_runtime': 2.0427, 'eval_samples_per_second': 52.87, 'eval_steps_per_second': 52.87, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/760 [00:00<?, ?it/s]

{'loss': 0.5759, 'grad_norm': 1.6118379831314087, 'learning_rate': 0.00011813751477591411, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 20:35:36,268] Trial 19 pruned. 


{'eval_loss': 0.6112065315246582, 'eval_accuracy': 0.6851851851851852, 'eval_f1': 0.6730769230769231, 'eval_runtime': 1.8026, 'eval_samples_per_second': 59.913, 'eval_steps_per_second': 59.913, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6387, 'grad_norm': 1.0342384576797485, 'learning_rate': 3.143573566110602e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 20:35:57,344] Trial 20 pruned. 


{'eval_loss': 0.6138800382614136, 'eval_accuracy': 0.7129629629629629, 'eval_f1': 0.5373134328358209, 'eval_runtime': 1.9377, 'eval_samples_per_second': 55.737, 'eval_steps_per_second': 55.737, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6224, 'grad_norm': 1.6965137720108032, 'learning_rate': 5.034221390920438e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5608298182487488, 'eval_accuracy': 0.7962962962962963, 'eval_f1': 0.7755102040816326, 'eval_runtime': 2.107, 'eval_samples_per_second': 51.259, 'eval_steps_per_second': 51.259, 'epoch': 1.0}
{'loss': 0.5026, 'grad_norm': 1.5546398162841797, 'learning_rate': 4.769262370345679e-05, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4535491466522217, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7961165048543689, 'eval_runtime': 2.0191, 'eval_samples_per_second': 53.489, 'eval_steps_per_second': 53.489, 'epoch': 2.0}
{'loss': 0.3962, 'grad_norm': 1.7268047332763672, 'learning_rate': 4.5043033497709186e-05, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.46349647641181946, 'eval_accuracy': 0.8055555555555556, 'eval_f1': 0.7407407407407407, 'eval_runtime': 1.9468, 'eval_samples_per_second': 55.476, 'eval_steps_per_second': 55.476, 'epoch': 3.0}
{'loss': 0.3753, 'grad_norm': 3.5905911922454834, 'learning_rate': 4.2393443291961595e-05, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.5023605823516846, 'eval_accuracy': 0.7592592592592593, 'eval_f1': 0.7592592592592593, 'eval_runtime': 1.8868, 'eval_samples_per_second': 57.239, 'eval_steps_per_second': 57.239, 'epoch': 4.0}
{'loss': 0.3556, 'grad_norm': 1.9503350257873535, 'learning_rate': 3.974385308621399e-05, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42841988801956177, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7619047619047619, 'eval_runtime': 1.9231, 'eval_samples_per_second': 56.159, 'eval_steps_per_second': 56.159, 'epoch': 5.0}
{'loss': 0.331, 'grad_norm': 3.102741241455078, 'learning_rate': 3.7094262880466386e-05, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4130479097366333, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.8, 'eval_runtime': 1.888, 'eval_samples_per_second': 57.203, 'eval_steps_per_second': 57.203, 'epoch': 6.0}
{'loss': 0.3053, 'grad_norm': 1.7875922918319702, 'learning_rate': 3.4444672674718794e-05, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4322858452796936, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7692307692307693, 'eval_runtime': 1.9276, 'eval_samples_per_second': 56.028, 'eval_steps_per_second': 56.028, 'epoch': 7.0}
{'loss': 0.2772, 'grad_norm': 4.615385055541992, 'learning_rate': 3.179508246897119e-05, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.3992272615432739, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7906976744186046, 'eval_runtime': 1.8581, 'eval_samples_per_second': 58.124, 'eval_steps_per_second': 58.124, 'epoch': 8.0}
{'loss': 0.265, 'grad_norm': 2.641169786453247, 'learning_rate': 2.9145492263223595e-05, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4143219292163849, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 1.8699, 'eval_samples_per_second': 57.756, 'eval_steps_per_second': 57.756, 'epoch': 9.0}
{'loss': 0.2742, 'grad_norm': 1.5275226831436157, 'learning_rate': 2.6495902057475994e-05, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43167826533317566, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7692307692307693, 'eval_runtime': 1.9217, 'eval_samples_per_second': 56.2, 'eval_steps_per_second': 56.2, 'epoch': 10.0}
{'loss': 0.2483, 'grad_norm': 3.3491194248199463, 'learning_rate': 2.3846311851728396e-05, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.39712050557136536, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.8, 'eval_runtime': 1.8624, 'eval_samples_per_second': 57.989, 'eval_steps_per_second': 57.989, 'epoch': 11.0}
{'loss': 0.2352, 'grad_norm': 0.6690291166305542, 'learning_rate': 2.1196721645980798e-05, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4086383581161499, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7804878048780488, 'eval_runtime': 1.8899, 'eval_samples_per_second': 57.146, 'eval_steps_per_second': 57.146, 'epoch': 12.0}
{'loss': 0.2363, 'grad_norm': 2.440214157104492, 'learning_rate': 1.8547131440233193e-05, 'epoch': 13.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.409506231546402, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 1.8797, 'eval_samples_per_second': 57.455, 'eval_steps_per_second': 57.455, 'epoch': 13.0}
{'loss': 0.2263, 'grad_norm': 4.203370571136475, 'learning_rate': 1.5897541234485595e-05, 'epoch': 14.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43539461493492126, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 1.9182, 'eval_samples_per_second': 56.303, 'eval_steps_per_second': 56.303, 'epoch': 14.0}
{'loss': 0.22, 'grad_norm': 3.1566832065582275, 'learning_rate': 1.3247951028737997e-05, 'epoch': 15.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4390592873096466, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 1.929, 'eval_samples_per_second': 55.988, 'eval_steps_per_second': 55.988, 'epoch': 15.0}
{'loss': 0.19, 'grad_norm': 2.9061148166656494, 'learning_rate': 1.0598360822990399e-05, 'epoch': 16.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4304048418998718, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7951807228915663, 'eval_runtime': 1.9379, 'eval_samples_per_second': 55.73, 'eval_steps_per_second': 55.73, 'epoch': 16.0}
{'loss': 0.2442, 'grad_norm': 2.275707483291626, 'learning_rate': 7.948770617242797e-06, 'epoch': 17.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43214914202690125, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7619047619047619, 'eval_runtime': 1.8751, 'eval_samples_per_second': 57.596, 'eval_steps_per_second': 57.596, 'epoch': 17.0}
{'loss': 0.2014, 'grad_norm': 2.6953225135803223, 'learning_rate': 5.299180411495199e-06, 'epoch': 18.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4331884980201721, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 1.7204, 'eval_samples_per_second': 62.778, 'eval_steps_per_second': 62.778, 'epoch': 18.0}
{'loss': 0.1811, 'grad_norm': 3.0402276515960693, 'learning_rate': 2.6495902057475997e-06, 'epoch': 19.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4348296523094177, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 1.7908, 'eval_samples_per_second': 60.308, 'eval_steps_per_second': 60.308, 'epoch': 19.0}
{'loss': 0.1812, 'grad_norm': 2.0208349227905273, 'learning_rate': 0.0, 'epoch': 20.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42842981219291687, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7804878048780488, 'eval_runtime': 1.7457, 'eval_samples_per_second': 61.865, 'eval_steps_per_second': 61.865, 'epoch': 20.0}


[I 2024-07-22 20:43:13,824] Trial 21 finished with value: 0.7804878048780488 and parameters: {'learning_rate': 5.299180411495199e-05, 'per_device_train_batch_size': 32, 'weight_decay': 0.1}. Best is trial 3 with value: 0.8.


{'train_runtime': 435.7513, 'train_samples_per_second': 27.86, 'train_steps_per_second': 0.872, 'train_loss': 0.2934404787264372, 'epoch': 20.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.5987, 'grad_norm': 2.3780884742736816, 'learning_rate': 6.708444791087714e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.48632147908210754, 'eval_accuracy': 0.7962962962962963, 'eval_f1': 0.7755102040816326, 'eval_runtime': 1.713, 'eval_samples_per_second': 63.049, 'eval_steps_per_second': 63.049, 'epoch': 1.0}
{'loss': 0.4982, 'grad_norm': 1.475396990776062, 'learning_rate': 6.35536874945152e-05, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 20:43:56,209] Trial 22 pruned. 


{'eval_loss': 0.43124985694885254, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7619047619047619, 'eval_runtime': 1.7737, 'eval_samples_per_second': 60.888, 'eval_steps_per_second': 60.888, 'epoch': 2.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6336, 'grad_norm': 0.997482180595398, 'learning_rate': 0.000287683981528515, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 20:44:16,879] Trial 23 pruned. 


{'eval_loss': 0.854678750038147, 'eval_accuracy': 0.5648148148148148, 'eval_f1': 0.6518518518518519, 'eval_runtime': 1.7105, 'eval_samples_per_second': 63.14, 'eval_steps_per_second': 63.14, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6441, 'grad_norm': 1.0929800271987915, 'learning_rate': 2.2545979222691253e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 20:44:37,942] Trial 24 pruned. 


{'eval_loss': 0.6318535208702087, 'eval_accuracy': 0.6018518518518519, 'eval_f1': 0.0851063829787234, 'eval_runtime': 1.739, 'eval_samples_per_second': 62.105, 'eval_steps_per_second': 62.105, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6589, 'grad_norm': 1.0908536911010742, 'learning_rate': 7.88883279130967e-06, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 20:44:58,533] Trial 25 pruned. 


{'eval_loss': 0.6603240966796875, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.6878, 'eval_samples_per_second': 63.989, 'eval_steps_per_second': 63.989, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/760 [00:00<?, ?it/s]

{'loss': 0.5553, 'grad_norm': 1.876294493675232, 'learning_rate': 9.67769935119683e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 20:45:19,886] Trial 26 pruned. 


{'eval_loss': 0.6869780421257019, 'eval_accuracy': 0.6759259259259259, 'eval_f1': 0.7107438016528925, 'eval_runtime': 1.7132, 'eval_samples_per_second': 63.038, 'eval_steps_per_second': 63.038, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/1520 [00:00<?, ?it/s]

{'loss': 0.6674, 'grad_norm': 2.3776369094848633, 'learning_rate': 0.00020991623974916262, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 20:45:43,648] Trial 27 pruned. 


{'eval_loss': 0.6472730040550232, 'eval_accuracy': 0.5833333333333334, 'eval_f1': 0.0, 'eval_runtime': 1.7199, 'eval_samples_per_second': 62.796, 'eval_steps_per_second': 62.796, 'epoch': 1.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6256, 'grad_norm': 1.4161858558654785, 'learning_rate': 4.74366656493851e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.571727991104126, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7872340425531915, 'eval_runtime': 1.721, 'eval_samples_per_second': 62.756, 'eval_steps_per_second': 62.756, 'epoch': 1.0}
{'loss': 0.5005, 'grad_norm': 1.2901554107666016, 'learning_rate': 4.493999903625957e-05, 'epoch': 2.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4568263292312622, 'eval_accuracy': 0.7870370370370371, 'eval_f1': 0.780952380952381, 'eval_runtime': 1.6943, 'eval_samples_per_second': 63.743, 'eval_steps_per_second': 63.743, 'epoch': 2.0}
{'loss': 0.3942, 'grad_norm': 1.5702394247055054, 'learning_rate': 4.244333242313403e-05, 'epoch': 3.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.44260555505752563, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7674418604651163, 'eval_runtime': 1.7116, 'eval_samples_per_second': 63.1, 'eval_steps_per_second': 63.1, 'epoch': 3.0}
{'loss': 0.3813, 'grad_norm': 2.9463744163513184, 'learning_rate': 3.9946665810008506e-05, 'epoch': 4.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.483325332403183, 'eval_accuracy': 0.7592592592592593, 'eval_f1': 0.7547169811320755, 'eval_runtime': 1.699, 'eval_samples_per_second': 63.566, 'eval_steps_per_second': 63.566, 'epoch': 4.0}
{'loss': 0.3561, 'grad_norm': 1.9474234580993652, 'learning_rate': 3.744999919688297e-05, 'epoch': 5.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42168769240379333, 'eval_accuracy': 0.8148148148148148, 'eval_f1': 0.7727272727272727, 'eval_runtime': 1.8507, 'eval_samples_per_second': 58.355, 'eval_steps_per_second': 58.355, 'epoch': 5.0}
{'loss': 0.3392, 'grad_norm': 2.6693737506866455, 'learning_rate': 3.495333258375744e-05, 'epoch': 6.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4295365810394287, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7804878048780488, 'eval_runtime': 3.5594, 'eval_samples_per_second': 30.342, 'eval_steps_per_second': 30.342, 'epoch': 6.0}
{'loss': 0.3092, 'grad_norm': 2.4185268878936768, 'learning_rate': 3.245666597063191e-05, 'epoch': 7.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.42411333322525024, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8048780487804879, 'eval_runtime': 2.8469, 'eval_samples_per_second': 37.935, 'eval_steps_per_second': 37.935, 'epoch': 7.0}
{'loss': 0.2788, 'grad_norm': 3.5434768199920654, 'learning_rate': 2.9959999357506374e-05, 'epoch': 8.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.40698522329330444, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7906976744186046, 'eval_runtime': 3.9846, 'eval_samples_per_second': 27.104, 'eval_steps_per_second': 27.104, 'epoch': 8.0}
{'loss': 0.2713, 'grad_norm': 2.639378786087036, 'learning_rate': 2.7463332744380848e-05, 'epoch': 9.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.40820109844207764, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7906976744186046, 'eval_runtime': 2.1646, 'eval_samples_per_second': 49.893, 'eval_steps_per_second': 49.893, 'epoch': 9.0}
{'loss': 0.2743, 'grad_norm': 1.3758822679519653, 'learning_rate': 2.4966666131255314e-05, 'epoch': 10.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4183759391307831, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.759493670886076, 'eval_runtime': 2.5732, 'eval_samples_per_second': 41.971, 'eval_steps_per_second': 41.971, 'epoch': 10.0}
{'loss': 0.2507, 'grad_norm': 3.0729777812957764, 'learning_rate': 2.2469999518129783e-05, 'epoch': 11.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4036009609699249, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7906976744186046, 'eval_runtime': 3.3528, 'eval_samples_per_second': 32.212, 'eval_steps_per_second': 32.212, 'epoch': 11.0}
{'loss': 0.2411, 'grad_norm': 0.6165900230407715, 'learning_rate': 1.9973332905004253e-05, 'epoch': 12.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.41289278864860535, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8, 'eval_runtime': 3.3119, 'eval_samples_per_second': 32.609, 'eval_steps_per_second': 32.609, 'epoch': 12.0}
{'loss': 0.2399, 'grad_norm': 2.9610917568206787, 'learning_rate': 1.747666629187872e-05, 'epoch': 13.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4152557849884033, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.775, 'eval_runtime': 1.7634, 'eval_samples_per_second': 61.244, 'eval_steps_per_second': 61.244, 'epoch': 13.0}
{'loss': 0.2245, 'grad_norm': 3.2774534225463867, 'learning_rate': 1.4979999678753187e-05, 'epoch': 14.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.44101741909980774, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 1.6761, 'eval_samples_per_second': 64.435, 'eval_steps_per_second': 64.435, 'epoch': 14.0}
{'loss': 0.2363, 'grad_norm': 2.8856687545776367, 'learning_rate': 1.2483333065627657e-05, 'epoch': 15.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4298252463340759, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7901234567901234, 'eval_runtime': 2.1538, 'eval_samples_per_second': 50.143, 'eval_steps_per_second': 50.143, 'epoch': 15.0}
{'loss': 0.1941, 'grad_norm': 2.7557904720306396, 'learning_rate': 9.986666452502127e-06, 'epoch': 16.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.43742603063583374, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8048780487804879, 'eval_runtime': 1.8735, 'eval_samples_per_second': 57.647, 'eval_steps_per_second': 57.647, 'epoch': 16.0}
{'loss': 0.2362, 'grad_norm': 2.6491916179656982, 'learning_rate': 7.489999839376594e-06, 'epoch': 17.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.44047847390174866, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8, 'eval_runtime': 1.9067, 'eval_samples_per_second': 56.643, 'eval_steps_per_second': 56.643, 'epoch': 17.0}
{'loss': 0.2062, 'grad_norm': 2.642678737640381, 'learning_rate': 4.993333226251063e-06, 'epoch': 18.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4366033673286438, 'eval_accuracy': 0.8425925925925926, 'eval_f1': 0.7848101265822784, 'eval_runtime': 1.792, 'eval_samples_per_second': 60.267, 'eval_steps_per_second': 60.267, 'epoch': 18.0}
{'loss': 0.1815, 'grad_norm': 2.4234516620635986, 'learning_rate': 2.4966666131255316e-06, 'epoch': 19.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.45312634110450745, 'eval_accuracy': 0.8240740740740741, 'eval_f1': 0.7532467532467533, 'eval_runtime': 3.9906, 'eval_samples_per_second': 27.063, 'eval_steps_per_second': 27.063, 'epoch': 19.0}
{'loss': 0.1922, 'grad_norm': 2.110167980194092, 'learning_rate': 0.0, 'epoch': 20.0}


  0%|          | 0/108 [00:00<?, ?it/s]

{'eval_loss': 0.4412083923816681, 'eval_accuracy': 0.8518518518518519, 'eval_f1': 0.8, 'eval_runtime': 2.3128, 'eval_samples_per_second': 46.697, 'eval_steps_per_second': 46.697, 'epoch': 20.0}


[I 2024-07-22 21:37:27,375] Trial 28 finished with value: 0.8 and parameters: {'learning_rate': 4.993333226251063e-05, 'per_device_train_batch_size': 32, 'weight_decay': 0.1}. Best is trial 3 with value: 0.8.


{'train_runtime': 3103.0883, 'train_samples_per_second': 3.912, 'train_steps_per_second': 0.122, 'train_loss': 0.2966584613448695, 'epoch': 20.0}


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/380 [00:00<?, ?it/s]

{'loss': 0.6359, 'grad_norm': 1.182146430015564, 'learning_rate': 3.5069880339523914e-05, 'epoch': 1.0}


  0%|          | 0/108 [00:00<?, ?it/s]

[I 2024-07-22 21:38:00,468] Trial 29 pruned. 


{'eval_loss': 0.6049523949623108, 'eval_accuracy': 0.7222222222222222, 'eval_f1': 0.5588235294117647, 'eval_runtime': 3.0909, 'eval_samples_per_second': 34.941, 'eval_steps_per_second': 34.941, 'epoch': 1.0}


In [14]:
best_run

BestRun(run_id='3', objective=0.8, hyperparameters={'learning_rate': 4.003920490135503e-05, 'per_device_train_batch_size': 16, 'weight_decay': 0}, run_summary=None)

In [15]:
study = trainer._trial.study

# Save study
optuna.copy_study(
    from_study_name=study.study_name,
    from_storage=study._storage,
    to_storage=f"sqlite:///{output_dir}/study.db",
    to_study_name="study"
)

# Save sampler state
with open(f"{output_dir}/sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)
    
# Save pruner state
with open(f"{output_dir}/pruner.pkl", "wb") as fout:
    pickle.dump(study.pruner, fout)

[I 2024-07-22 21:38:01,018] A new study created in RDB with name: study


In [16]:
optuna.visualization.plot_optimization_history(study)

In [17]:
optuna.visualization.plot_param_importances(study)

In [18]:
optuna.visualization.plot_contour(study)

---

# Train the best model

In [19]:
best_run

BestRun(run_id='3', objective=0.8, hyperparameters={'learning_rate': 4.003920490135503e-05, 'per_device_train_batch_size': 16, 'weight_decay': 0}, run_summary=None)

In [20]:
# training_args = TrainingArguments(
#     output_dir=output_dir,
#     save_strategy='epoch',
#     evaluation_strategy='epoch',
#     logging_strategy='epoch',
#     learning_rate=best_run.hyperparameters['learning_rate'],
#     # weight_decay=config.weight_decay,
#     num_train_epochs=TRAIN_EPOCHS,
#     per_device_train_batch_size=best_run.hyperparameters['per_device_train_batch_size'],
#     per_device_eval_batch_size=1,
#     save_total_limit=1,
#     remove_unused_columns=False,
#     push_to_hub=False,
#     fp16=False, # float point 16 bit precision (instead of 32)
#     load_best_model_at_end=True,
# )

# trainer = Trainer(
#     model=None,
#     model_init=model_init,
#     args=training_args,
#     data_collator=collate_fn,
#     compute_metrics=partial(compute_metrics, metrics=[load_metric(m, trust_remote_code=True) for m in METRICS]),
#     train_dataset=ds['train'],
#     eval_dataset=ds['test'],
#     tokenizer=processor,
# )

# trainer.train()

In [21]:
# # Re-Evaluate the best model
# metrics = trainer.evaluate()
# print(metrics)

In [22]:
best_run.hyperparameters

{'learning_rate': 4.003920490135503e-05,
 'per_device_train_batch_size': 16,
 'weight_decay': 0}

In [23]:
study.trials[int(best_run.run_id)]

FrozenTrial(number=3, state=TrialState.COMPLETE, values=[0.8], datetime_start=datetime.datetime(2024, 7, 22, 18, 37, 51, 699898), datetime_complete=datetime.datetime(2024, 7, 22, 18, 44, 34, 841965), params={'learning_rate': 4.003920490135503e-05, 'per_device_train_batch_size': 16, 'weight_decay': 0}, user_attrs={}, system_attrs={}, intermediate_values={38: 0.7878787878787878, 76: 0.7619047619047619, 114: 0.7676767676767676, 152: 0.7619047619047619, 190: 0.6666666666666666, 228: 0.8045977011494253, 266: 0.7640449438202247, 304: 0.7848101265822784, 342: 0.7906976744186046, 380: 0.7027027027027027, 418: 0.7532467532467533, 456: 0.8, 494: 0.7901234567901234, 532: 0.8, 570: 0.7901234567901234, 608: 0.7777777777777778, 646: 0.7631578947368421, 684: 0.7951807228915663, 722: 0.8048780487804879, 760: 0.8}, distributions={'learning_rate': FloatDistribution(high=0.001, log=True, low=1e-06, step=None), 'per_device_train_batch_size': CategoricalDistribution(choices=(4, 8, 16, 32)), 'weight_decay':

In [None]:
set_split_summary = {
    'train': {
        'image_count': {
            '0': 0,
            '1': 0
        }
    },
    'test': {
        'image_count': {
            '0': 0,
            '1': 0
        }
    }
}
for ds_set in set_split_summary:
    for data in ds[ds_set]:
        set_split_summary[ds_set]['image_count'][str(data['labels'])] += 1
    total = sum(set_split_summary[ds_set]['image_count'].values())
    ratio0 = round(set_split_summary[ds_set]['image_count']['0'] / total * 100)
    set_split_summary[ds_set]['ratio'] = f'{ratio0}/{100-ratio0}'

In [24]:
summary = {
    'optuna': {
        'best_run_id': best_run.run_id,
        'objective': OBJECTIVE_METRIC,
        'direction': DIRECTION,
        'optimized_objective': best_run.objective,
        'trials': NUM_TRIALS,
        'epochs': TRAIN_EPOCHS,
        'tunned_params': best_run.hyperparameters,
    },
    'dataset': DATASET.name,
    'nfcs': NFCS_REGION.name,
    'masked': MASKED,
    'val_split': VALIDATION_SIZE,
    'train_val_details': set_split_summary,
    'model': model_name_or_path,
    'pickle_version': pickle.format_version,
    'timestamp': now.isoformat(timespec='minutes'),
    'description': DESCRIPTION
}

In [25]:
with open(f'{output_dir}/summary.json', "w") as f:
    json.dump(summary, f, indent=2)