In [1]:
import wandb
wandb.login()


%env WANDB_PROJECT=Planigo_product_training
%env WANDB_LOG_MODEL=true

[34m[1mwandb[0m: Currently logged in as: [33msathsara_rasantha[0m. Use [1m`wandb login --relogin`[0m to force relogin


env: WANDB_PROJECT=Planigo_product_training
env: WANDB_LOG_MODEL=true


In [2]:
from datasets import load_dataset

dataset = load_dataset("imagefolder", data_dir="/home/orelit/Projects -Sathsara/Planigo/data/CVAT Data/Wine_training_VIT")

Resolving data files:   0%|          | 0/40214 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/9999 [00:00<?, ?it/s]

Using custom data configuration default-ebd2a4bc8d209e11
Reusing dataset imagefolder (/home/orelit/.cache/huggingface/datasets/imagefolder/default-ebd2a4bc8d209e11/0.0.0/0fc50c79b681877cc46b23245a6ef5333d036f48db40d53765a68034bc48faff)


  0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
labels = dataset['train'].features['label']

In [4]:
from transformers import ViTFeatureExtractor


checkpoint = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(checkpoint)


In [5]:
from torchvision.transforms import (
    Compose,
    Normalize,
    Resize,
    RandomResizedCrop,
    RandomHorizontalFlip,
    RandomAdjustSharpness,
    ToTensor,
    ToPILImage
)


# train
train_aug_transforms = Compose([
    RandomResizedCrop(size=feature_extractor.size),
    RandomHorizontalFlip(p=0.5),
    RandomAdjustSharpness(sharpness_factor=5, p=0.5),
    ToTensor(),
    Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
])


# validation/test
valid_aug_transforms = Compose([
    Resize(size=(feature_extractor.size, feature_extractor.size)),
    ToTensor(),
    Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
])


In [6]:
def apply_train_aug_transforms(examples):
  examples['pixel_values'] = [train_aug_transforms(img.convert('RGB')) for img in examples['image']]
  return examples


def apply_valid_aug_transforms(examples):
  examples['pixel_values'] = [valid_aug_transforms(img.convert('RGB')) for img in examples['image']]
  return examples


dataset['train'].set_transform(apply_train_aug_transforms)
dataset['validation'].set_transform(apply_valid_aug_transforms)

In [7]:
datasets_processed = dataset.rename_column('label', 'labels')

In [8]:
from transformers import ViTForImageClassification


def model_init():
    vit_model = ViTForImageClassification.from_pretrained(
        checkpoint,
        num_labels=labels.num_classes,
        id2label={index: label for index, label in enumerate(labels.names)},
        label2id={label: index for index, label in enumerate(labels.names)}
    )
    return vit_model


In [9]:
sweep_config = {
    'method': 'random'
}


# hyperparameters
parameters_dict = {
    'epochs': {
        'value': 1
        },
    'batch_size': {
        'values': [8, 16, 32, 64]
        },
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-5,
        'max': 1e-3
    },
    'weight_decay': {
        'values': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
    },
}


sweep_config['parameters'] = parameters_dict

In [10]:
sweep_id = wandb.sweep(sweep_config, project='Planigo_product_training')

Create sweep with ID: mmmd0s8n
Sweep URL: https://wandb.ai/sathsara_rasantha/Planigo_product_training/sweeps/mmmd0s8n


In [11]:
from datasets import load_metric
import numpy as np


def compute_metrics_fn(eval_preds):
  metrics = dict()
  
  accuracy_metric = load_metric('accuracy')
  precision_metric = load_metric('precision')
  recall_metric = load_metric('recall')
  f1_metric = load_metric('f1')


  logits = eval_preds.predictions
  labels = eval_preds.label_ids
  preds = np.argmax(logits, axis=-1)  
  
  metrics.update(accuracy_metric.compute(predictions=preds, references=labels))
  metrics.update(precision_metric.compute(predictions=preds, references=labels, average='weighted'))
  metrics.update(recall_metric.compute(predictions=preds, references=labels, average='weighted'))
  metrics.update(f1_metric.compute(predictions=preds, references=labels, average='weighted'))


  return metrics


In [12]:
import torch


def collate_fn(examples):
  pixel_values = torch.stack([example['pixel_values'] for example in examples])
  labels = torch.tensor([example['labels'] for example in examples])
  return {'pixel_values': pixel_values, 'labels': labels}


In [13]:
from transformers import TrainingArguments, Trainer


def train(config=None):
  with wandb.init(config=config):
    # set sweep configuration
    config = wandb.config


    # set training arguments
    training_args = TrainingArguments(
        output_dir='canned_food_sweeps',
        report_to='wandb',  # Turn on Weights & Biases logging
        num_train_epochs=config.epochs,
        learning_rate=config.learning_rate,
        weight_decay=config.weight_decay,
        per_device_train_batch_size=config.batch_size,
        per_device_eval_batch_size=16,
        save_strategy='epoch',
        evaluation_strategy='epoch',
        logging_strategy='epoch',
        load_best_model_at_end=True,
        remove_unused_columns=False,
        fp16=True
    )


    # define training loop
    trainer = Trainer(
        # model,
        model_init=model_init,
        args=training_args,
        data_collator=collate_fn,
        train_dataset=datasets_processed['train'],
        eval_dataset=datasets_processed['validation'],
        compute_metrics=compute_metrics_fn
    )


    # start training loop
    trainer.train()


In [14]:
wandb.agent(sweep_id, train, count=5)

[34m[1mwandb[0m: Agent Starting Run: mjra6o5g with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	learning_rate: 0.000805880825993634
[34m[1mwandb[0m: 	weight_decay: 0


loading configuration file config.json from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224-in21k",
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "073490154317",
    "1": "3760125946719",
    "2": "3760125946870",
    "3": "4001432773230",
    "4": "4022025001905",
    "5": "4022025001929",
    "6": "4022025002100",
    "7": "4022025261002",
    "8": "4022025290408",
    "9": "4603400000043",
    "10": "5998623530644",
    "11": "608614309160",
    "12": "608614309184",
    "13": "608614309245",
    "14": "608614309269",
    "15": "608614309276",
    "16": "608614309290",
    "17": "7290000023809",
    "18": "7290000023816",
    "19": "7290000023847",
    "

loading weights file pytorch_model.bin from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/pytorch_model.bin
Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are ne

loading weights file pytorch_model.bin from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/pytorch_model.bin
Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are ne

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.4515,1.738773,0.555722,0.560023,0.555722,0.501818


***** Running Evaluation *****
  Num examples = 9996
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to canned_food_sweeps/checkpoint-2512
Configuration saved in canned_food_sweeps/checkpoint-2512/config.json
Model weights saved in canned_food_sweeps/checkpoint-2512/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from canned_food_sweeps/checkpoint-2512 (score: 1.738773226737976).
Using cuda_amp half precision backend
Saving model checkpoint to /tmp/tmp30kv50se
Configuration saved in /tmp/tmp30kv50se/config.json
Model weights saved in /tmp/tmp30kv50se/pytorch_model.bin


VBox(children=(Label(value='327.981 MB of 327.981 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0,…

0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁
train/global_step,▁▁▁

0,1
eval/accuracy,0.55572
eval/f1,0.50182
eval/loss,1.73877
eval/precision,0.56002
eval/recall,0.55572
eval/runtime,98.1632
eval/samples_per_second,101.83
eval/steps_per_second,6.367
train/epoch,1.0
train/global_step,2512.0


[34m[1mwandb[0m: Agent Starting Run: 2n2pz1yn with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	learning_rate: 0.0006553989611473101
[34m[1mwandb[0m: 	weight_decay: 0.2


PyTorch: setting up devices
loading configuration file config.json from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224-in21k",
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "073490154317",
    "1": "3760125946719",
    "2": "3760125946870",
    "3": "4001432773230",
    "4": "4022025001905",
    "5": "4022025001929",
    "6": "4022025002100",
    "7": "4022025261002",
    "8": "4022025290408",
    "9": "4603400000043",
    "10": "5998623530644",
    "11": "608614309160",
    "12": "608614309184",
    "13": "608614309245",
    "14": "608614309269",
    "15": "608614309276",
    "16": "608614309290",
    "17": "7290000023809",
    "18": "7290000023816",
    

loading weights file pytorch_model.bin from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/pytorch_model.bin
Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are ne

loading weights file pytorch_model.bin from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/pytorch_model.bin
Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are ne

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.3112,1.5813,0.582333,0.577721,0.582333,0.521222


***** Running Evaluation *****
  Num examples = 9996
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to canned_food_sweeps/checkpoint-2512
Configuration saved in canned_food_sweeps/checkpoint-2512/config.json
Model weights saved in canned_food_sweeps/checkpoint-2512/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from canned_food_sweeps/checkpoint-2512 (score: 1.5812997817993164).
Using cuda_amp half precision backend
Saving model checkpoint to /tmp/tmpwojtnv5c
Configuration saved in /tmp/tmpwojtnv5c/config.json
Model weights saved in /tmp/tmpwojtnv5c/pytorch_model.bin


VBox(children=(Label(value='327.981 MB of 327.981 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0,…

0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁
train/global_step,▁▁▁

0,1
eval/accuracy,0.58233
eval/f1,0.52122
eval/loss,1.5813
eval/precision,0.57772
eval/recall,0.58233
eval/runtime,98.157
eval/samples_per_second,101.837
eval/steps_per_second,6.367
train/epoch,1.0
train/global_step,2512.0


[34m[1mwandb[0m: Agent Starting Run: zorf0wpe with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	learning_rate: 0.00030422168950805463
[34m[1mwandb[0m: 	weight_decay: 0.1


PyTorch: setting up devices
loading configuration file config.json from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224-in21k",
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "073490154317",
    "1": "3760125946719",
    "2": "3760125946870",
    "3": "4001432773230",
    "4": "4022025001905",
    "5": "4022025001929",
    "6": "4022025002100",
    "7": "4022025261002",
    "8": "4022025290408",
    "9": "4603400000043",
    "10": "5998623530644",
    "11": "608614309160",
    "12": "608614309184",
    "13": "608614309245",
    "14": "608614309269",
    "15": "608614309276",
    "16": "608614309290",
    "17": "7290000023809",
    "18": "7290000023816",
    

loading weights file pytorch_model.bin from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/pytorch_model.bin
Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are ne

loading weights file pytorch_model.bin from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/pytorch_model.bin
Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are ne

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6581,0.75148,0.848039,0.837983,0.848039,0.822651


***** Running Evaluation *****
  Num examples = 9996
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to canned_food_sweeps/checkpoint-2512
Configuration saved in canned_food_sweeps/checkpoint-2512/config.json
Model weights saved in canned_food_sweeps/checkpoint-2512/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from canned_food_sweeps/checkpoint-2512 (score: 0.7514801621437073).
Using cuda_amp half precision backend
Saving model checkpoint to /tmp/tmp72t5xxu6
Configuration saved in /tmp/tmp72t5xxu6/config.json
Model weights saved in /tmp/tmp72t5xxu6/pytorch_model.bin


VBox(children=(Label(value='327.981 MB of 327.981 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0,…

0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁
train/global_step,▁▁▁

0,1
eval/accuracy,0.84804
eval/f1,0.82265
eval/loss,0.75148
eval/precision,0.83798
eval/recall,0.84804
eval/runtime,95.8804
eval/samples_per_second,104.255
eval/steps_per_second,6.519
train/epoch,1.0
train/global_step,2512.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xspyw5kk with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	learning_rate: 3.5094032216512484e-05
[34m[1mwandb[0m: 	weight_decay: 0.2


PyTorch: setting up devices
loading configuration file config.json from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224-in21k",
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "073490154317",
    "1": "3760125946719",
    "2": "3760125946870",
    "3": "4001432773230",
    "4": "4022025001905",
    "5": "4022025001929",
    "6": "4022025002100",
    "7": "4022025261002",
    "8": "4022025290408",
    "9": "4603400000043",
    "10": "5998623530644",
    "11": "608614309160",
    "12": "608614309184",
    "13": "608614309245",
    "14": "608614309269",
    "15": "608614309276",
    "16": "608614309290",
    "17": "7290000023809",
    "18": "7290000023816",
    

loading weights file pytorch_model.bin from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/pytorch_model.bin
Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are ne

loading weights file pytorch_model.bin from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/pytorch_model.bin
Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are ne

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.3161,2.39432,0.907363,0.890998,0.907363,0.888819


***** Running Evaluation *****
  Num examples = 9996
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to canned_food_sweeps/checkpoint-5024
Configuration saved in canned_food_sweeps/checkpoint-5024/config.json
Model weights saved in canned_food_sweeps/checkpoint-5024/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from canned_food_sweeps/checkpoint-5024 (score: 2.394319534301758).
Using cuda_amp half precision backend
Saving model checkpoint to /tmp/tmpkokqkt57
Configuration saved in /tmp/tmpkokqkt57/config.json
Model weights saved in /tmp/tmpkokqkt57/pytorch_model.bin


VBox(children=(Label(value='327.981 MB of 327.981 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0,…

0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁
train/global_step,▁▁▁

0,1
eval/accuracy,0.90736
eval/f1,0.88882
eval/loss,2.39432
eval/precision,0.891
eval/recall,0.90736
eval/runtime,94.6425
eval/samples_per_second,105.618
eval/steps_per_second,6.604
train/epoch,1.0
train/global_step,5024.0


[34m[1mwandb[0m: Agent Starting Run: 7wxicp58 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	learning_rate: 0.0003592883550153904
[34m[1mwandb[0m: 	weight_decay: 0.4


PyTorch: setting up devices
loading configuration file config.json from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224-in21k",
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "073490154317",
    "1": "3760125946719",
    "2": "3760125946870",
    "3": "4001432773230",
    "4": "4022025001905",
    "5": "4022025001929",
    "6": "4022025002100",
    "7": "4022025261002",
    "8": "4022025290408",
    "9": "4603400000043",
    "10": "5998623530644",
    "11": "608614309160",
    "12": "608614309184",
    "13": "608614309245",
    "14": "608614309269",
    "15": "608614309276",
    "16": "608614309290",
    "17": "7290000023809",
    "18": "7290000023816",
    

loading weights file pytorch_model.bin from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/pytorch_model.bin
Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are ne

loading weights file pytorch_model.bin from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/pytorch_model.bin
Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are ne

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 7wxicp58 errored: RuntimeError('CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 7.79 GiB total capacity; 6.01 GiB already allocated; 51.81 MiB free; 6.14 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 7wxicp58 errored: RuntimeError('CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 7.79 GiB total capacity; 6.01 GiB already allocated; 51.81 MiB free; 6.14 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF')
