In [1]:
!pip install --upgrade pip
!pip install tokenizers
!pip install datasets --upgrade evaluate
!pip install transformers
!pip install numpy torch matplotlib pandas scikit-learn tqdm pillow
!pip install datasets evaluate transformers
!pip install torchvision
!pip install setuptools
!pip install wandb
!pip show wandb
!pip install schedulefree
!pip install nbformat


Name: wandb
Version: 0.19.9
Summary: A CLI and library for interacting with the Weights & Biases API.
Home-page: 
Author: 
Author-email: Weights & Biases <support@wandb.com>
License: MIT License

Copyright (c) 2021 Weights and Biases, Inc.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEM

In [2]:
import os
import numpy as np
import torch
import matplotlib.pyplot as plt
import pandas as pd
from datasets import load_dataset
from evaluate import load
from transformers import (
    ViTFeatureExtractor,
    ViTForImageClassification,
    TrainingArguments,
    Trainer,
    get_scheduler,
    AutoImageProcessor
)

from torch.optim import AdamW, SGD
import wandb
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torchvision.transforms as transforms
from PIL import Image
import random
from tqdm.auto import tqdm
from sklearn.metrics import confusion_matrix
from schedulefree import AdamWScheduleFree
from torch.optim.lr_scheduler import CyclicLR, ExponentialLR, ReduceLROnPlateau


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# sweep_config = {
#     "method": "grid",  # we cna use "grid", "random", or "bayes"
#     "metric": {
#         "name": "val_accuracy",  # Metric to optimize
#         "goal": "maximize"       # Goal: maximize or minimize
#     },
#     "parameters": {
#         "optimizer_name": {
#             "values": ["AdamW", "SGD", "RMSProp", "AdaGrad", "schedule_free_adamw"]  # Optimizers to test
#         },
#         "learning_rate": {
#             "values": [2e-5, 2e-4, 2e-3, 2e-2, 2e-1]  # Fixed learning rate for simplicity
#         },
#         "batch_size": {
#             "values": [16]  # Fixed batch size
#         },
#         "num_epochs": {
#             "values": [3]  # Fixed number of epochs
#         },
#         "scheduler_name": {
#             "values": ["cosine"]  # Fixed scheduler for simplicity
#         }
#     }
# }

In [None]:
# Set seed for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed()

# Initialize experiment tracking
def init_wandb(project_name, experiment_name, config):
    return wandb.init(
        # entity="dl_project_sp25",
        project=project_name,
        name=experiment_name,
        config=config,
        reinit=True
    )

# Load and prepare dataset
def prepare_dataset(dataset_name, image_processor):
    """
    Load and prepare a dataset from Hugging Face for ViT fine-tuning
    """
    # Load the dataset
    print(f"Loading dataset: {dataset_name}")
    dataset = load_dataset(dataset_name)

    # Get label information
    if "label" in dataset["train"].features:
        labels = dataset["train"].features["label"].names
    elif "labels" in dataset["train"].features:
        labels = dataset["train"].features["labels"].names
    else:
        # Count unique labels and create labels list
        all_labels = dataset["train"][0]["label"] if "label" in dataset["train"][0] else dataset["train"][0]["labels"]
        num_labels = len(set(all_labels))
        labels = [str(i) for i in range(num_labels)]

    # Create label mappings
    label2id = {label: i for i, label in enumerate(labels)}
    id2label = {i: label for i, label in enumerate(labels)}

    # Set up image transformations based on the model's requirements
    normalize = transforms.Normalize(
        mean=image_processor.image_mean,
        std=image_processor.image_std
    )

    # Get the expected image size
    if "shortest_edge" in image_processor.size:
        size = image_processor.size["shortest_edge"]
    else:
        size = (image_processor.size["height"], image_processor.size["width"])

    # Define transforms for training data
    train_transforms = transforms.Compose([
        transforms.RandomResizedCrop(size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    # Define transforms for validation/test data
    val_transforms = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.ToTensor(),
        normalize,
    ])

    # Apply transformations to the dataset
    def preprocess_train(examples):
        examples["pixel_values"] = [
            train_transforms(image.convert("RGB"))
            for image in examples["image"]
        ]
        return examples

    def preprocess_val(examples):
        examples["pixel_values"] = [
            val_transforms(image.convert("RGB"))
            for image in examples["image"]
        ]
        return examples

    # Apply preprocessing to each split
    train_dataset = dataset["train"].map(
        preprocess_train, batched=True, remove_columns=["image"]
    )

    if "validation" in dataset:
        val_dataset = dataset["validation"].map(
            preprocess_val, batched=True, remove_columns=["image"]
        )

    else:
        # Create a validation split if none exists
        splits = train_dataset.train_test_split(test_size=0.2, seed=42)
        train_dataset = splits["train"]
        val_dataset = splits["test"]

    if "test" in dataset:
        test_dataset = dataset["test"].map(
            preprocess_val, batched=True, remove_columns=["image"]
        )
    else:
        # test_dataset = val_dataset    #split further rather than using validation as test dataset

        # Further split validation dataset to create a test dataset
        test_split = val_dataset.train_test_split(test_size=0.2, seed=42)
        val_dataset = test_split["train"]  # Update validation dataset
        test_dataset = test_split["test"]  # Create test dataset

    print(f"Dataset prepared with {len(train_dataset)} training, {len(val_dataset)} validation, and {len(test_dataset)} test examples")

    return train_dataset, val_dataset, test_dataset, id2label, label2id

# Define compute_metrics function for evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    acc = accuracy_score(labels, predictions)

    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
    }

# Main experiment pipeline
def run_vit_experiment(config): 
    """
    Run a ViT experiment with the specified configuration
    """
    # Initialize wandb for experiment tracking
    # run = init_wandb("ViT-LR-Schedulers", config["experiment_name"], config)

    # Initialize wandb for experiment tracking with proper config logging
    run = wandb.init(
        project="ViT-LR-Schedulers",
        name=config["experiment_name"],
        group=f"{config['optimizer_name']}_experiments",  # Group by optimizer
        config={
            # Explicitly list all important hyperparameters
            "optimizer": config["optimizer_name"],
            "scheduler": config["scheduler_name"],
            "learning_rate": config["learning_rate"],
            "batch_size": config["batch_size"],
            "num_epochs": config["num_epochs"],
            "weight_decay": config["weight_decay"],
            "warmup_ratio": config.get("warmup_ratio", 0.0),
            "dataset": config["dataset_name"],
            "model": config["model_name"],
        },
        tags=[config["optimizer_name"], config["scheduler_name"]],  # Add tags for easy filtering
        reinit=True
    )


    # Load the image processor for the model
    image_processor = AutoImageProcessor.from_pretrained(config["model_name"], use_fast=True)

    # Prepare the dataset
    train_dataset, val_dataset, test_dataset, id2label, label2id = prepare_dataset(
        config["dataset_name"], image_processor
    )

    # # Visualize some images from the training dataset (do this w/o the remove_columns=["image"])
    # # Initialize a set to keep track of shown labels
    # shown_labels = set()

    # # Initialize the figure for plotting
    # plt.figure(figsize=(15, 10))

    # # Loop through the dataset and plot the first image of each label
    # for i, sample in enumerate(train_dataset):
    #     label = sample["label"]
    #     image = sample["image"]

    #     # Check if the label has already been shown
    #     if label not in shown_labels:
    #         plt.subplot(1, len(id2label), len(shown_labels) + 1)
    #         plt.imshow(image.convert("RGB"))  # Convert to RGB if necessary
    #         plt.title(id2label[label])  # Get label name
    #         plt.axis("off")
    #         shown_labels.add(label)

    #         # Stop if all labels have been shown
    #         if len(shown_labels) == len(id2label):
    #             break

    # plt.show()

    # Load the ViT model
    model = ViTForImageClassification.from_pretrained(
        config["model_name"],
        num_labels=len(id2label),
        id2label=id2label,
        label2id=label2id,
        ignore_mismatched_sizes=True
    )

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f"./results/{config['experiment_name']}",
        per_device_train_batch_size=config["batch_size"],
        per_device_eval_batch_size=config["batch_size"],
        num_train_epochs=config["num_epochs"],
        weight_decay=config["weight_decay"],
        eval_strategy="steps",
        save_strategy="steps",
        logging_strategy="steps",  # Ensure logging is enabled
        logging_steps=10,          # Log every 10 steps (adjust as needed)
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        push_to_hub=False,
        report_to="wandb",
        remove_unused_columns=False,
        learning_rate=config["learning_rate"],
    )

    # Setup optimizer
    # if config["optimizer_name"] == "AdamW":
    #     optimizer = AdamW(model.parameters(), lr=config["learning_rate"])
    # else:  # SGD
    #     optimizer = SGD(model.parameters(), lr=config["learning_rate"], momentum=0.9)

    if config["optimizer_name"] == "schedule_free_adamw":
        optimizer = AdamWScheduleFree(model.parameters(), lr=config["learning_rate"])
    elif config["optimizer_name"] == "AdamW":
        optimizer = AdamW(model.parameters(), lr=config["learning_rate"])
    elif config["optimizer_name"] == "SGD":
        optimizer = SGD(model.parameters(), lr=config["learning_rate"], momentum=0.9)
    elif config["optimizer_name"] == "RMSProp":
        optimizer = torch.optim.RMSprop(model.parameters(), lr=config["learning_rate"])
    elif config["optimizer_name"] == "AdaGrad":
        optimizer = torch.optim.Adagrad(model.parameters(), lr=config["learning_rate"])
    else:
        raise ValueError(f"Optimizer {config['optimizer_name']} not supported")

    # Setup scheduler
    num_training_steps = len(train_dataset) // config["batch_size"] * config["num_epochs"]
    num_warmup_steps = int(num_training_steps * config["warmup_ratio"]) if "warmup_ratio" in config else 0

    scheduler_name = config["scheduler_name"]
    if scheduler_name == "linear":
        scheduler = get_scheduler(
            "linear",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )
    elif scheduler_name == "cosine":
        scheduler = get_scheduler(
            "cosine",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )
    
    elif scheduler_name == "polynomial":
        scheduler = get_scheduler(
            "polynomial",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps,
            # power=config.get("poly_power", 1.0),
        )
    elif scheduler_name == "cyclic":
        # CyclicLR from torch.optim.lr_scheduler
        # Using step_size_up as 1/3 of training steps and step_size_down as 2/3
        step_size_up = num_training_steps // 3
        scheduler = CyclicLR(
            optimizer,
            base_lr=config["learning_rate"] / 10,  # Lower bound of cycle
            max_lr=config["learning_rate"],       # Upper bound of cycle
            step_size_up=step_size_up,
            step_size_down=step_size_up * 2,
            mode='triangular',                    # Three modes: triangular, triangular2, exp_range
            cycle_momentum=False                  # Don't cycle momentum
        )
    elif scheduler_name == "exponential":
        # ExponentialLR from torch.optim.lr_scheduler
        # gamma < 1.0 for decay, common values: 0.9, 0.95, 0.99
        scheduler = ExponentialLR(optimizer, gamma=0.95)

    # elif scheduler_name == "adaptive":
    #     # ReduceLROnPlateau - reduces LR when metric stops improving
    #     # This requires modification to the training loop to update based on validation performance
    #     scheduler = ReduceLROnPlateau(
    #         optimizer, 
    #         mode='max',              # Since we want to maximize accuracy
    #         factor=0.5,              # Multiply LR by this factor when plateauing
    #         patience=2,              # Number of epochs with no improvement after which LR will be reduced
    #         threshold=0.01,          # Threshold for measuring improvement
    #         threshold_mode='rel',    # Interpret threshold as relative change
    #         min_lr=1e-6              # Lower bound on the learning rate
    #     )

    elif scheduler_name == "constant":
        scheduler = get_scheduler(
            "constant",
            optimizer=optimizer,
        )
    elif scheduler_name == "cosine_with_restarts":
        scheduler = get_scheduler(
            "cosine_with_restarts",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps,
        )
    elif scheduler_name == "constant_with_warmup":
        scheduler = get_scheduler(
            "constant_with_warmup",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
        )
    # add more experiments if required ...

    else:
        raise ValueError(f"Scheduler {scheduler_name} not supported")

    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        optimizers=(optimizer, scheduler)
    )

    # # Loss validation curve in the training loop to log metrics to W&B
    # for epoch in range(config["num_epochs"]):
    #     print(f"Epoch {epoch + 1}/{config['num_epochs']}")

    #     # Train for one epoch
    #     trainer.train()

    #     # Evaluate on validation set
    #     eval_results = trainer.evaluate(val_dataset)

    #     print(trainer.state.log_history)
    #     if trainer.state.log_history and "loss" in trainer.state.log_history[-1]:
    #         train_loss = trainer.state.log_history[-1]["loss"]
    #     else:
    #         train_loss = None

    #     # Log training and validation metrics to W&B
    #     wandb.log({
    #         "epoch": epoch + 1,
    #         "train_loss": trainer.state.log_history[-1].get("loss", None),
    #         "val_loss": eval_results["eval_loss"],
    #         "val_accuracy": eval_results["eval_accuracy"],
    #     })

    # # Loss epoch curve in the training loop to log metrics to W&B
    # for epoch in range(config["num_epochs"]):
    #     print(f"Epoch {epoch + 1}/{config['num_epochs']}")

    #     # Train for one epoch
    #     trainer.train()

    #     # Evaluate on validation set
    #     eval_results = trainer.evaluate(val_dataset)

    #     # Extract training loss from the trainer's state
    #     if trainer.state.log_history and "loss" in trainer.state.log_history[-1]:
    #         train_loss = trainer.state.log_history[-1]["loss"]
    #     else:
    #         train_loss = None  # Handle missing loss gracefully

    #     # Log training and validation metrics to W&B
    #     wandb.log({
    #         "epoch": epoch + 1,
    #         "train_loss": train_loss,                  # Training loss
    #         "val_loss": eval_results["eval_loss"],    # Validation loss
    #         "val_accuracy": eval_results["eval_accuracy"],  # Validation accuracy
    #     })

    # Train the model
    print(f"Starting training for {config['experiment_name']}...")
    trainer.train()

    # Evaluate the model
    print(f"Evaluating {config['experiment_name']}...")
    eval_results = trainer.evaluate(test_dataset)


    # Log final metrics
    wandb.log({
        "final_accuracy": eval_results["eval_accuracy"],
        "final_f1": eval_results["eval_f1"],
        "final_precision": eval_results["eval_precision"],
        "final_recall": eval_results["eval_recall"],
    })

    # Compute confusion matrix for test set
    predictions, labels, _ = trainer.predict(test_dataset)
    predictions = np.argmax(predictions, axis=1)

    # Convert to lists
    labels = labels.tolist()
    predictions = predictions.tolist()

    # Log confusion matrix to W&B
    wandb.log({
        "confusion_matrix_test": wandb.plot.confusion_matrix(
            probs=None,
            y_true=labels,
            preds=predictions,
            class_names=[str(i) for i in range(len(np.unique(labels)))]
        )
    })



    # Save the model
    trainer.save_model(f"./saved_models/{config['experiment_name']}")

    # Finish wandb run
    # wandb.finish()

    return eval_results

# Get experiment configurations for challenging datasets
def get_experiment_configs():
    # We'll use a more complex dataset from Hugging Face
    base_config = {
        "model_name": "google/vit-base-patch16-224-in21k",
        "dataset_name": "jbarat/plant_species",  # Any challenging dataset.
        "batch_size": 16,
        "num_epochs": 10, # let's keep smaller number to begin with.
        "weight_decay": 0.01,
        # "optimizer_name": "AdamW",
    }

    # Optimizers with their corresponding learning rates
    optimizers = {
        "schedule_free_adamw": 0.0002,
        "AdamW": 0.0002,
        "RMSProp": 0.0002,
        "AdaGrad": 0.0002,
        "SGD": 0.02,
    }

    # Schedulers to test
    schedulers = [
        "linear",
        "cosine",
        "polynomial",
        "cyclic",
        "exponential",
        # "adaptive",
        "constant",
        "cosine_with_restarts",
        "constant_with_warmup",

    ]



    # Different learning rate scheduler configurations
    configs = []
    
    # # Constant learning rate (baseline)
    # configs.append({
    #     **base_config,
    #     "experiment_name": "vit_constant_lr",
    #     "learning_rate": 2e-4,
    #     "scheduler_name": "constant",
    # })

    for optimizer_name, learning_rate in optimizers.items():
        for scheduler_name in schedulers:
            config = {
                **base_config,
                "experiment_name": f"{optimizer_name}_{scheduler_name}",
                "optimizer_name": optimizer_name,
                "learning_rate": learning_rate,
                "scheduler_name": scheduler_name,
                "warmup_ratio": 0.1,  # Keep warmup ratio consistent
            }
            configs.append(config)


    # # Cosine with restarts
    # configs.append({
    #     **base_config,
    #     "experiment_name": "vit_cosine_restarts",
    #     "learning_rate": 2e-4,
    #     "scheduler_name": "cosine_with_restarts",
    #     "warmup_ratio": 0.1,
    # })
    
    # # Constant with warmup
    # configs.append({
    #     **base_config,
    #     "experiment_name": "vit_constant_warmup",
    #     "learning_rate": 2e-4,
    #     "scheduler_name": "constant_with_warmup",
    #     "warmup_ratio": 0.1,
    # })

    # # Linear decay
    # configs.append({
    #     **base_config,
    #     "experiment_name": "vit_linear_decay",
    #     "learning_rate": 5e-5,
    #     "scheduler_name": "linear",
    #     "warmup_ratio": 0.1,
    # })

    # # Cosine decay (commonly used with ViT)
    # configs.append({
    #     **base_config,
    #     "experiment_name": "vit_cosine_decay",
    #     "learning_rate": 5e-5,
    #     "scheduler_name": "cosine",
    #     "warmup_ratio": 0.1,
    # })


    # # Polynomial decay
    # configs.append({
    #     **base_config,
    #     "experiment_name": "vit_polynomial",
    #     "learning_rate": 5e-5,
    #     "scheduler_name": "polynomial",
    #     "warmup_ratio": 0.1,
    #     "poly_power": 2.0,
    # })


    # # Different learning rate experiments
    # for lr in [1e-5, 3e-5, 1e-4]:
    #     configs.append({
    #         **base_config,
    #         "experiment_name": f"vit_cosine_lr_{lr}",
    #         "learning_rate": lr,
    #         "scheduler_name": "cosine",
    #         "warmup_ratio": 0.1,
    #     })

    # # Different optimizer experiments
    # configs.append({
    #     **base_config,
    #     "experiment_name": "vit_sgd_cosine",
    #     "learning_rate": 0.01,  # Higher LR for SGD
    #     "scheduler_name": "cosine",
    #     "warmup_ratio": 0.1,
    #     "optimizer_name": "SGD",
    # })

    # here we can make changes to add new datasets to experiment.
    # or change batch_size to see the impact.
    # Other datasets to try (uncomment to use)
    #   Erik: We can use a data set as a strech. Maybe something less similar than plants for better contrasting comparison?
    # flowers dataset
    # configs.append({
    #     **base_config,
    #     "dataset_name": "huggan/flowers",
    #     "experiment_name": "vit_flowers_cosine",
    #     "learning_rate": 5e-5,
    #     "scheduler_name": "cosine",
    #     "warmup_ratio": 0.1,
    # })

    return configs

# Run experiments and visualize results
def run_all_experiments():
    configs = get_experiment_configs()
    results = []

    for config in configs:
        print(f"\n{'='*50}")
        print(f"Running experiment: {config['experiment_name']}")
        print(f"{'='*50}\n")

        eval_results = run_vit_experiment(config)
        results.append({
            "experiment": config['experiment_name'],
            "accuracy": eval_results["eval_accuracy"],
            "f1": eval_results["eval_f1"],
            "precision": eval_results["eval_precision"],
            "recall": eval_results["eval_recall"],
            "config": config
        })

    # Make sure to close the final run
    if wandb.run is not None:
        wandb.finish()

    return results

# Visualize and compare results
def visualize_results(results):
    # Create DataFrame for easier plotting
    df = pd.DataFrame([
        {
            "Experiment": result["experiment"],
            "Accuracy": result["accuracy"],
            "F1 Score": result["f1"],
            "Precision": result["precision"],
            "Recall": result["recall"],
            "Learning Rate": result["config"]["learning_rate"],
            "Scheduler": result["config"]["scheduler_name"],
            "Optimizer": result["config"]["optimizer_name"],
            "Dataset": result["config"]["dataset_name"]
        }
        for result in results
    ])

    # Plot accuracy comparison
    plt.figure(figsize=(14, 8))
    ax = plt.bar(df["Experiment"], df["Accuracy"], color='skyblue')
    plt.xlabel('Experiment')
    plt.ylabel('Accuracy')
    plt.title('Comparison of Model Accuracy Across Experiments')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig("accuracy_comparison.png")
    plt.close()

    # Plot all metrics for a more comprehensive comparison
    plt.figure(figsize=(16, 10))
    metrics = ["Accuracy", "F1 Score", "Precision", "Recall"]
    x = np.arange(len(df["Experiment"]))
    width = 0.2

    for i, metric in enumerate(metrics):
        plt.bar(x + i*width, df[metric], width=width, label=metric)

    plt.xlabel('Experiment')
    plt.ylabel('Score')
    plt.title('Comparison of Metrics Across Experiments')
    plt.xticks(x + width*1.5, df["Experiment"], rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.savefig("metrics_comparison.png")
    plt.close()

    # Plot results by scheduler type
    plt.figure(figsize=(14, 8))
    schedulers = df["Scheduler"].unique()
    for scheduler in schedulers:
        scheduler_data = df[df["Scheduler"] == scheduler]
        plt.plot(scheduler_data["Learning Rate"], scheduler_data["Accuracy"], 'o-', label=scheduler)

    plt.xlabel('Learning Rate')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs. Learning Rate by Scheduler Type')
    plt.xscale('log')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("scheduler_comparison.png")
    plt.close()

    # Create a table with results
    print("Results Summary:")
    print(df[["Experiment", "Accuracy", "F1 Score", "Precision", "Recall", "Scheduler", "Learning Rate", "Optimizer", "Dataset"]])

    # Save results to CSV
    df.to_csv("experiment_results.csv", index=False)

    return df

# Function to run a single experiment (useful for testing)
def run_single_experiment(experiment_index=0):
    configs = get_experiment_configs()
    if experiment_index >= len(configs):
        print(f"Invalid experiment index. Choose between 0 and {len(configs)-1}")
        return

    config = configs[experiment_index]
    print(f"Running single experiment: {config['experiment_name']}")
    eval_results = run_vit_experiment(config)

    print(f"\nResults for {config['experiment_name']}:")
    print(f"Accuracy: {eval_results['eval_accuracy']:.4f}")
    print(f"F1 Score: {eval_results['eval_f1']:.4f}")
    print(f"Precision: {eval_results['eval_precision']:.4f}")
    print(f"Recall: {eval_results['eval_recall']:.4f}")

    return eval_results


In [5]:
def run_optimizer_sweep():
    # Initialize W&B run first, then access config
    with wandb.init() as run:
        print(f"W&B initialized: {run.name}")
        
        # Get config from sweep
        config = wandb.config
        
        # Set experiment name based on sweep parameters
        custom_name = f"vit_{config.optimizer_name}_{config.learning_rate}"
        # Update the run name after initialization
        wandb.run.name = custom_name
        wandb.run.save()
        
        print(f"Running experiment: {custom_name}")
        
        # Load model and processor
        model_name = "google/vit-base-patch16-224-in21k"
        dataset_name = "jbarat/plant_species"
        
        # Load the image processor
        image_processor = AutoImageProcessor.from_pretrained(model_name, use_fast=True)
        
        # Prepare dataset
        train_dataset, val_dataset, test_dataset, id2label, label2id = prepare_dataset(
            dataset_name, image_processor
        )
        
        # Load the ViT model
        model = ViTForImageClassification.from_pretrained(
            model_name,
            num_labels=len(id2label),
            id2label=id2label,
            label2id=label2id,
            ignore_mismatched_sizes=True
        )
        
        # Define training arguments
        training_args = TrainingArguments(
            output_dir=f"./results/{custom_name}",
            per_device_train_batch_size=config.batch_size,
            per_device_eval_batch_size=config.batch_size,
            num_train_epochs=config.num_epochs,
            weight_decay=0.01,
            eval_strategy="steps",
            save_strategy="steps",
            logging_strategy="steps",
            logging_steps=10,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            push_to_hub=False,
            report_to="wandb",
            remove_unused_columns=False,
            learning_rate=config.learning_rate,
        )
        # Set up optimizer based on config
        if config.optimizer_name == "schedule_free_adamw":
            optimizer = AdamWScheduleFree(
                model.parameters(),
                lr=config.learning_rate,  # Learning rate
                # warmup_steps=500  # Optional: Adjust based on your dataset
            )
        elif config.optimizer_name == "AdamW":
            optimizer = AdamW(model.parameters(), lr=config.learning_rate)
        elif config.optimizer_name == "SGD":
            optimizer = SGD(model.parameters(), lr=config.learning_rate, momentum=0.9)
        elif config.optimizer_name == "RMSProp":
            optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate)
        elif config.optimizer_name == "AdaGrad":
            optimizer = torch.optim.Adagrad(model.parameters(), lr=config.learning_rate)
        else:
            optimizer = AdamW(model.parameters(), lr=config.learning_rate)        
        
        # Setup scheduler
        num_training_steps = len(train_dataset) // config.batch_size * config.num_epochs
        num_warmup_steps = int(num_training_steps * 0.1)  # 10% warmup
        
        scheduler = get_scheduler(
            config.scheduler_name,
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )
        
        # Initialize Trainer
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics,
            optimizers=(optimizer, scheduler)
        )
        
        # Train the model
        print(f"Starting training...")
        # optimizer.train()  # Switch optimizer to training mode only for schedule_free
        trainer.train()
        
        # Evaluate on validation dataset
        print(f"Evaluating on validation set...")
        # optimizer.eval()  # Switch optimizer to evaluation mode only for schedule_free
        eval_results = trainer.evaluate(val_dataset)
        
        # Log validation metrics
        run.log({
            "val_accuracy": eval_results["eval_accuracy"],
            "val_f1": eval_results["eval_f1"],
            "val_precision": eval_results["eval_precision"],
            "val_recall": eval_results["eval_recall"],
            "val_loss": eval_results["eval_loss"]
        })
        
        # Evaluate on test dataset
        print(f"Evaluating on test set...")
        test_results = trainer.evaluate(test_dataset)
        
        # Log test metrics
        run.log({
            "test_accuracy": test_results["eval_accuracy"],
            "test_f1": test_results["eval_f1"],
            "test_precision": test_results["eval_precision"],
            "test_recall": test_results["eval_recall"],
            "test_loss": test_results["eval_loss"]
        })
        
        # Compute confusion matrix for test set
        predictions, labels, _ = trainer.predict(test_dataset)
        predictions = np.argmax(predictions, axis=1)
        
        # Log confusion matrix
        run.log({
            "confusion_matrix": wandb.plot.confusion_matrix(
                probs=None,
                y_true=labels.tolist(),
                preds=predictions.tolist(),
                class_names=[id2label[i] for i in range(len(id2label))]
            )
        })
        
        # Save the model
        model_path = f"./saved_models/{custom_name}"
        trainer.save_model(model_path)
        print(f"Model saved to {model_path}")

In [6]:

# Main execution
if __name__ == "__main__":
    print("Starting ViT experiments with different learning rate schedulers...")
    os.environ["WANDB_PROJECT"] = "ViT-LR-Schedulers"

    # Option 1: Run all experiments (time-consuming)
    results = run_all_experiments()
    results_df = visualize_results(results)

    # Option 2: Run a single experiment for testing
    # run_single_experiment(0)  # Try the baseline experiment first

    #option 3: Optimizer sweep:
    # Define sweep configuration
    # sweep_config = {
    #     "method": "grid",  # we can use "grid", "random", or "bayes"
    #     "metric": {
    #         "name": "val_accuracy",  # Metric to optimize
    #         "goal": "maximize"       # Goal: maximize or minimize
    #     },
    #     "parameters": {
    #         "optimizer_name": {
    #             "values": ["schedule_free_adamw","AdamW", "SGD", "RMSProp", "AdaGrad"]  # Optimizers to test
    #         },
    #         "learning_rate": {
    #             "values": [2e-5, 2e-4, 2e-3, 2e-2, 2e-1]  # Learning rates to test
    #         },
    #         "batch_size": {
    #             "values": [16]  # Fixed batch size
    #         },
    #         "num_epochs": {
    #             "values": [3]  # Fixed number of epochs
    #         },
    #         "scheduler_name": {
    #             "values": ["cosine"]  # Fixed scheduler for simplicity
    #         }
    #     }
    # }
    
    # # Initialize the sweep
    # sweep_id = wandb.sweep(sweep_config, project="ViT-Optimizer-Sweep")
    # wandb.agent(sweep_id, function=run_optimizer_sweep)
    

    print("Experiments completed!")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Starting ViT experiments with different learning rate schedulers...

Running experiment: schedule_free_adamw_linear



[34m[1mwandb[0m: Currently logged in as: [33mewg[0m ([33mdl_project_sp25[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for schedule_free_adamw_linear...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.969,1.785059,0.5625,0.536508,0.570873,0.5625
20,1.6609,1.534147,0.710938,0.693423,0.725606,0.710938
30,1.4747,1.365635,0.742188,0.732892,0.756633,0.742188
40,1.3008,1.264886,0.796875,0.791564,0.802535,0.796875


Evaluating schedule_free_adamw_linear...



Running experiment: schedule_free_adamw_cosine



0,1
eval/accuracy,▁▅▆█▅
eval/f1,▁▅▆█▅
eval/loss,█▅▂▁▁
eval/precision,▁▆▇█▇
eval/recall,▁▅▆█▅
eval/runtime,█▇▇▇▁
eval/samples_per_second,▁██▅▁
eval/steps_per_second,▁██▄▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.6875
eval/f1,0.66977
eval/loss,1.28794
eval/precision,0.75986
eval/recall,0.6875
eval/runtime,1.9481
eval/samples_per_second,16.426
eval/steps_per_second,1.027
final_accuracy,0.6875
final_f1,0.66977


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for schedule_free_adamw_cosine...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0045,1.822906,0.523438,0.455945,0.441909,0.523438
20,1.6736,1.545436,0.703125,0.676473,0.704491,0.703125
30,1.4604,1.359837,0.703125,0.680371,0.739692,0.703125
40,1.3287,1.271814,0.703125,0.685395,0.765694,0.703125


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating schedule_free_adamw_cosine...



Running experiment: schedule_free_adamw_polynomial



0,1
eval/accuracy,▁▇▇▇█
eval/f1,▁▇▇██
eval/loss,█▄▂▁▁
eval/precision,▁▇▇██
eval/recall,▁▇▇▇█
eval/runtime,▆▇██▁
eval/samples_per_second,██▇▇▁
eval/steps_per_second,██▇▇▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.71875
eval/f1,0.70139
eval/loss,1.29235
eval/precision,0.76488
eval/recall,0.71875
eval/runtime,4.3331
eval/samples_per_second,7.385
eval/steps_per_second,0.462
final_accuracy,0.71875
final_f1,0.70139


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for schedule_free_adamw_polynomial...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0052,1.829432,0.5,0.437407,0.424158,0.5
20,1.6946,1.577289,0.679688,0.655775,0.678674,0.679688
30,1.4981,1.394659,0.703125,0.675911,0.75802,0.703125
40,1.3515,1.293195,0.742188,0.737769,0.771663,0.742188


Evaluating schedule_free_adamw_polynomial...



Running experiment: schedule_free_adamw_cyclic



0,1
eval/accuracy,▁▆▇█▇
eval/f1,▁▆▇█▇
eval/loss,█▅▂▁▁
eval/precision,▁▆███
eval/recall,▁▆▇█▇
eval/runtime,█▇█▇▁
eval/samples_per_second,▅█▅█▁
eval/steps_per_second,▅█▅█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.71875
eval/f1,0.70139
eval/loss,1.3112
eval/precision,0.76488
eval/recall,0.71875
eval/runtime,2.4683
eval/samples_per_second,12.964
eval/steps_per_second,0.81
final_accuracy,0.71875
final_f1,0.70139


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for schedule_free_adamw_cyclic...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0414,1.918419,0.460938,0.430031,0.463105,0.460938
20,1.7589,1.617473,0.703125,0.685546,0.730205,0.703125
30,1.4948,1.38442,0.671875,0.659804,0.704787,0.671875
40,1.3251,1.26186,0.726562,0.731078,0.746382,0.726562


Evaluating schedule_free_adamw_cyclic...



Running experiment: schedule_free_adamw_exponential



0,1
eval/accuracy,▁▇▇██
eval/f1,▁▇▆█▇
eval/loss,█▅▂▁▁
eval/precision,▁█▇██
eval/recall,▁▇▇██
eval/runtime,█▇▇▇▁
eval/samples_per_second,▄███▁
eval/steps_per_second,▄███▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.71875
eval/f1,0.70682
eval/loss,1.26214
eval/precision,0.7474
eval/recall,0.71875
eval/runtime,2.1355
eval/samples_per_second,14.985
eval/steps_per_second,0.937
final_accuracy,0.71875
final_f1,0.70682


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for schedule_free_adamw_exponential...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9464,1.808017,0.5625,0.527744,0.62094,0.5625
20,1.7021,1.628185,0.671875,0.659342,0.690588,0.671875
30,1.5816,1.501522,0.6875,0.680385,0.716025,0.6875
40,1.474,1.410262,0.710938,0.700736,0.738974,0.710938


Evaluating schedule_free_adamw_exponential...



Running experiment: schedule_free_adamw_constant



0,1
eval/accuracy,▁▆▇█▂
eval/f1,▁▆▇█▃
eval/loss,█▅▃▁▂
eval/precision,▁▅▇█▁
eval/recall,▁▆▇█▂
eval/runtime,████▁
eval/samples_per_second,▇███▁
eval/steps_per_second,▇███▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.59375
eval/f1,0.5875
eval/loss,1.45323
eval/precision,0.62202
eval/recall,0.59375
eval/runtime,2.1799
eval/samples_per_second,14.68
eval/steps_per_second,0.917
final_accuracy,0.59375
final_f1,0.5875


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for schedule_free_adamw_constant...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9339,1.765116,0.640625,0.621034,0.692215,0.640625
20,1.6047,1.483805,0.726562,0.720873,0.746646,0.726562
30,1.3708,1.236748,0.765625,0.757758,0.790122,0.765625
40,1.1596,1.081425,0.75,0.751156,0.775976,0.75


Evaluating schedule_free_adamw_constant...



Running experiment: schedule_free_adamw_cosine_with_restarts



0,1
eval/accuracy,▁▆█▇▇
eval/f1,▁▆██▇
eval/loss,█▅▃▁▁
eval/precision,▁▅█▇▇
eval/recall,▁▆█▇▇
eval/runtime,█▇▇▇▁
eval/samples_per_second,▁▇▇█▅
eval/steps_per_second,▁▇▇█▅
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.75
eval/f1,0.73957
eval/loss,1.10172
eval/precision,0.77344
eval/recall,0.75
eval/runtime,1.7841
eval/samples_per_second,17.936
eval/steps_per_second,1.121
final_accuracy,0.75
final_f1,0.73957


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for schedule_free_adamw_cosine_with_restarts...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0045,1.822906,0.523438,0.455945,0.441909,0.523438
20,1.6736,1.545436,0.703125,0.676473,0.704491,0.703125
30,1.4604,1.359837,0.703125,0.680371,0.739692,0.703125
40,1.3287,1.271814,0.703125,0.685395,0.765694,0.703125


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating schedule_free_adamw_cosine_with_restarts...



Running experiment: schedule_free_adamw_constant_with_warmup



0,1
eval/accuracy,▁▇▇▇█
eval/f1,▁▇▇██
eval/loss,█▄▂▁▁
eval/precision,▁▇▇██
eval/recall,▁▇▇▇█
eval/runtime,▇▇▇█▁
eval/samples_per_second,███▇▁
eval/steps_per_second,███▇▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.71875
eval/f1,0.70139
eval/loss,1.29235
eval/precision,0.76488
eval/recall,0.71875
eval/runtime,3.8908
eval/samples_per_second,8.225
eval/steps_per_second,0.514
final_accuracy,0.71875
final_f1,0.70139


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for schedule_free_adamw_constant_with_warmup...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0043,1.820672,0.53125,0.466744,0.451774,0.53125
20,1.662,1.515369,0.71875,0.69679,0.728471,0.71875
30,1.4005,1.253473,0.726562,0.708423,0.788283,0.726562
40,1.147,1.078237,0.78125,0.78133,0.790185,0.78125


Evaluating schedule_free_adamw_constant_with_warmup...



Running experiment: AdamW_linear



0,1
eval/accuracy,▁▆▆▇█
eval/f1,▁▆▆▇█
eval/loss,█▅▃▁▁
eval/precision,▁▅▆▆█
eval/recall,▁▆▆▇█
eval/runtime,█▇█▇▁
eval/samples_per_second,▆▇▅█▁
eval/steps_per_second,▆▇▅█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.8125
eval/f1,0.81129
eval/loss,1.09211
eval/precision,0.89583
eval/recall,0.8125
eval/runtime,1.9812
eval/samples_per_second,16.152
eval/steps_per_second,1.009
final_accuracy,0.8125
final_f1,0.81129


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdamW_linear...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9756,1.738839,0.601562,0.575367,0.706725,0.601562
20,1.5888,1.414873,0.695312,0.68729,0.786133,0.695312
30,1.3842,1.242133,0.742188,0.741756,0.794458,0.742188
40,1.246,1.173809,0.78125,0.781559,0.801504,0.78125


Evaluating AdamW_linear...



Running experiment: AdamW_cosine



0,1
eval/accuracy,▁▅▆█▄
eval/f1,▁▅▇█▄
eval/loss,█▄▂▁▂
eval/precision,▁▇▇█▂
eval/recall,▁▅▆█▄
eval/runtime,███▇▁
eval/samples_per_second,▄▅▁█▁
eval/steps_per_second,▄▅▁█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.6875
eval/f1,0.6776
eval/loss,1.21528
eval/precision,0.72619
eval/recall,0.6875
eval/runtime,1.771
eval/samples_per_second,18.069
eval/steps_per_second,1.129
final_accuracy,0.6875
final_f1,0.6776


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdamW_cosine...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9742,1.728708,0.59375,0.569847,0.685227,0.59375
20,1.5679,1.381277,0.695312,0.687373,0.769771,0.695312
30,1.3505,1.232498,0.75,0.743582,0.77433,0.75
40,1.2479,1.199448,0.75,0.746359,0.767571,0.75


Evaluating AdamW_cosine...



Running experiment: AdamW_polynomial



0,1
eval/accuracy,▁▆███
eval/f1,▁▆███
eval/loss,█▃▁▁▁
eval/precision,▁▆▆▆█
eval/recall,▁▆███
eval/runtime,████▁
eval/samples_per_second,▇▆█▂▁
eval/steps_per_second,▇▆█▂▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.75
eval/f1,0.73785
eval/loss,1.23107
eval/precision,0.81027
eval/recall,0.75
eval/runtime,1.7512
eval/samples_per_second,18.273
eval/steps_per_second,1.142
final_accuracy,0.75
final_f1,0.73785


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdamW_polynomial...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9756,1.738788,0.601562,0.575367,0.706725,0.601562
20,1.5896,1.412154,0.6875,0.678462,0.772592,0.6875
30,1.3814,1.238309,0.742188,0.739534,0.776489,0.742188
40,1.2474,1.172677,0.78125,0.781559,0.801504,0.78125


Evaluating AdamW_polynomial...



Running experiment: AdamW_cyclic



0,1
eval/accuracy,▁▄▆█▄
eval/f1,▁▄▇█▄
eval/loss,█▄▂▁▁
eval/precision,▁▆▆█▂
eval/recall,▁▄▆█▄
eval/runtime,████▁
eval/samples_per_second,▆▇▆█▁
eval/steps_per_second,▆▇▆█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.6875
eval/f1,0.6776
eval/loss,1.20339
eval/precision,0.72619
eval/recall,0.6875
eval/runtime,1.7812
eval/samples_per_second,17.966
eval/steps_per_second,1.123
final_accuracy,0.6875
final_f1,0.6776


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdamW_cyclic...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0245,1.84746,0.570312,0.546791,0.637041,0.570312
20,1.6674,1.44733,0.71875,0.714054,0.758567,0.71875
30,1.3839,1.216116,0.71875,0.701237,0.76184,0.71875
40,1.2078,1.123825,0.765625,0.764782,0.793841,0.765625


Evaluating AdamW_cyclic...



Running experiment: AdamW_exponential



0,1
eval/accuracy,▁▆▆█▇
eval/f1,▁▆▆█▇
eval/loss,█▄▂▁▁
eval/precision,▁▅▅▆█
eval/recall,▁▆▆█▇
eval/runtime,▇▇█▇▁
eval/samples_per_second,██▇█▁
eval/steps_per_second,██▇█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.75
eval/f1,0.74503
eval/loss,1.12907
eval/precision,0.86615
eval/recall,0.75
eval/runtime,2.5031
eval/samples_per_second,12.784
eval/steps_per_second,0.799
final_accuracy,0.75
final_f1,0.74503


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdamW_exponential...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9013,1.687914,0.625,0.588474,0.646874,0.625
20,1.5794,1.465805,0.71875,0.70475,0.755665,0.71875
30,1.4419,1.325186,0.71875,0.707302,0.773397,0.71875
40,1.3347,1.246722,0.757812,0.756578,0.790443,0.757812


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating AdamW_exponential...



Running experiment: AdamW_constant



0,1
eval/accuracy,▁▆▆█▁
eval/f1,▁▆▆█▃
eval/loss,█▄▂▁▂
eval/precision,▁▆▇█▄
eval/recall,▁▆▆█▁
eval/runtime,█▇▇█▁
eval/samples_per_second,▄██▄▁
eval/steps_per_second,▄██▄▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.62969
eval/loss,1.31451
eval/precision,0.70379
eval/recall,0.625
eval/runtime,1.9897
eval/samples_per_second,16.083
eval/steps_per_second,1.005
final_accuracy,0.625
final_f1,0.62969


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdamW_constant...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.8835,1.655026,0.601562,0.553895,0.599219,0.601562
20,1.4746,1.296949,0.703125,0.692798,0.770689,0.703125
30,1.2324,1.073685,0.773438,0.774397,0.780395,0.773438
40,1.0495,0.968165,0.734375,0.742283,0.828007,0.734375


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating AdamW_constant...



Running experiment: AdamW_cosine_with_restarts



0,1
eval/accuracy,▁▅█▆▇
eval/f1,▁▅█▇▇
eval/loss,█▄▂▁▁
eval/precision,▁▆▆▇█
eval/recall,▁▅█▆▇
eval/runtime,████▁
eval/samples_per_second,▆▆▇█▁
eval/steps_per_second,▆▆▇█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.75
eval/f1,0.741
eval/loss,0.97039
eval/precision,0.84866
eval/recall,0.75
eval/runtime,1.846
eval/samples_per_second,17.335
eval/steps_per_second,1.083
final_accuracy,0.75
final_f1,0.741


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdamW_cosine_with_restarts...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9742,1.728708,0.59375,0.569847,0.685227,0.59375
20,1.5679,1.381277,0.695312,0.687373,0.769771,0.695312
30,1.3505,1.232498,0.75,0.743582,0.77433,0.75
40,1.2479,1.199448,0.75,0.746359,0.767571,0.75


Evaluating AdamW_cosine_with_restarts...



Running experiment: AdamW_constant_with_warmup



0,1
eval/accuracy,▁▆███
eval/f1,▁▆███
eval/loss,█▃▁▁▁
eval/precision,▁▆▆▆█
eval/recall,▁▆███
eval/runtime,▇█▇▇▁
eval/samples_per_second,▆▁██▅
eval/steps_per_second,▆▁██▅
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.75
eval/f1,0.73785
eval/loss,1.23107
eval/precision,0.81027
eval/recall,0.75
eval/runtime,1.7877
eval/samples_per_second,17.9
eval/steps_per_second,1.119
final_accuracy,0.75
final_f1,0.73785


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdamW_constant_with_warmup...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9743,1.726062,0.585938,0.558176,0.663926,0.585938
20,1.5429,1.32407,0.65625,0.639517,0.766891,0.65625
30,1.2618,1.103872,0.765625,0.76943,0.793967,0.765625
40,1.0235,0.934381,0.734375,0.743295,0.795639,0.734375


Evaluating AdamW_constant_with_warmup...



Running experiment: RMSProp_linear



0,1
eval/accuracy,▁▄▇▆█
eval/f1,▁▄█▇█
eval/loss,█▄▃▁▁
eval/precision,▁▅▆▆█
eval/recall,▁▄▇▆█
eval/runtime,████▁
eval/samples_per_second,▄▇█▅▁
eval/steps_per_second,▄▇█▅▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.78125
eval/f1,0.77131
eval/loss,0.92843
eval/precision,0.86354
eval/recall,0.78125
eval/runtime,1.7726
eval/samples_per_second,18.052
eval/steps_per_second,1.128
final_accuracy,0.78125
final_f1,0.77131


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for RMSProp_linear...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0649,2.037004,0.15625,0.043403,0.025202,0.15625
20,1.7953,1.61336,0.351562,0.297356,0.485626,0.351562
30,1.4457,1.113543,0.71875,0.708677,0.719411,0.71875
40,1.089,1.025261,0.742188,0.74612,0.773297,0.742188


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating RMSProp_linear...



Running experiment: RMSProp_cosine



0,1
eval/accuracy,▁▃██▇
eval/f1,▁▄██▇
eval/loss,█▅▂▁▁
eval/precision,▁▅▇██
eval/recall,▁▃██▇
eval/runtime,████▁
eval/samples_per_second,▇█▇▇▁
eval/steps_per_second,▇█▇▇▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.6875
eval/f1,0.67505
eval/loss,1.04252
eval/precision,0.76178
eval/recall,0.6875
eval/runtime,1.8162
eval/samples_per_second,17.619
eval/steps_per_second,1.101
final_accuracy,0.6875
final_f1,0.67505


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for RMSProp_cosine...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0781,2.053473,0.15625,0.042517,0.024606,0.15625
20,1.8833,1.531483,0.46875,0.43224,0.460962,0.46875
30,1.4552,1.160432,0.695312,0.692808,0.704568,0.695312
40,1.159,1.119615,0.710938,0.706062,0.728813,0.710938


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating RMSProp_cosine...



Running experiment: RMSProp_polynomial



0,1
eval/accuracy,▁▅██▇
eval/f1,▁▅██▇
eval/loss,█▄▁▁▁
eval/precision,▁▅███
eval/recall,▁▅██▇
eval/runtime,████▁
eval/samples_per_second,▅█▄▅▁
eval/steps_per_second,▅█▄▅▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.5969
eval/loss,1.13021
eval/precision,0.7322
eval/recall,0.625
eval/runtime,1.7807
eval/samples_per_second,17.97
eval/steps_per_second,1.123
final_accuracy,0.625
final_f1,0.5969


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for RMSProp_polynomial...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0651,2.039762,0.15625,0.042517,0.024606,0.15625
20,1.83,1.507426,0.515625,0.481928,0.524813,0.515625
30,1.441,1.208421,0.6875,0.688099,0.703151,0.6875
40,1.1522,1.081533,0.757812,0.758268,0.773814,0.757812


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating RMSProp_polynomial...



Running experiment: RMSProp_cyclic



0,1
eval/accuracy,▁▅▇█▇
eval/f1,▁▅▇█▇
eval/loss,█▄▂▁▁
eval/precision,▁▆▇██
eval/recall,▁▅▇█▇
eval/runtime,████▁
eval/samples_per_second,████▁
eval/steps_per_second,██▇█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.65625
eval/f1,0.62892
eval/loss,1.11313
eval/precision,0.75484
eval/recall,0.65625
eval/runtime,1.816
eval/samples_per_second,17.621
eval/steps_per_second,1.101
final_accuracy,0.65625
final_f1,0.62892


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for RMSProp_cyclic...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9231,1.687377,0.554688,0.495165,0.578912,0.554688
20,2.0321,2.076891,0.15625,0.04223,0.024414,0.15625
30,2.0884,2.078687,0.125,0.045616,0.056822,0.125
40,2.083,2.0794,0.109375,0.021567,0.011963,0.109375


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating RMSProp_cyclic...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Running experiment: RMSProp_exponential



0,1
eval/accuracy,█▂▁▁▁
eval/f1,█▁▁▁▁
eval/loss,▁████
eval/precision,█▁▂▁▁
eval/recall,█▂▁▁▁
eval/runtime,████▁
eval/samples_per_second,▇▆▅█▁
eval/steps_per_second,▇▆▅█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.125
eval/f1,0.03472
eval/loss,2.07143
eval/precision,0.02016
eval/recall,0.125
eval/runtime,1.7488
eval/samples_per_second,18.298
eval/steps_per_second,1.144
final_accuracy,0.125
final_f1,0.03472


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for RMSProp_exponential...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.116,2.088271,0.117188,0.024585,0.013733,0.117188
20,2.0995,2.08053,0.109375,0.02172,0.012057,0.109375
30,2.0845,2.079837,0.140625,0.064912,0.048751,0.140625
40,2.0794,2.069796,0.1875,0.099839,0.089115,0.1875


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating RMSProp_exponential...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Running experiment: RMSProp_constant



0,1
eval/accuracy,▂▁▄█▅
eval/f1,▁▁▅█▇
eval/loss,█▅▅▁▅
eval/precision,▁▁▄█▅
eval/recall,▂▁▄█▅
eval/runtime,████▁
eval/samples_per_second,▆▆▆█▁
eval/steps_per_second,▆▆▆█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.15625
eval/f1,0.08364
eval/loss,2.07939
eval/precision,0.05729
eval/recall,0.15625
eval/runtime,1.7821
eval/samples_per_second,17.957
eval/steps_per_second,1.122
final_accuracy,0.15625
final_f1,0.08364


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for RMSProp_constant...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.1323,2.087856,0.117188,0.024585,0.013733,0.117188
20,2.1071,2.072129,0.15625,0.042517,0.024606,0.15625
30,2.0868,2.011781,0.210938,0.137153,0.140625,0.210938
40,2.0818,2.069034,0.1875,0.121596,0.174322,0.1875


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating RMSProp_constant...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Running experiment: RMSProp_cosine_with_restarts



0,1
eval/accuracy,▁▄█▆▄
eval/f1,▁▂█▇▄
eval/loss,█▇▁▆█
eval/precision,▁▁▇█▂
eval/recall,▁▄█▆▄
eval/runtime,████▁
eval/samples_per_second,▅██▇▁
eval/steps_per_second,▅██▇▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.15625
eval/f1,0.07083
eval/loss,2.08417
eval/precision,0.04688
eval/recall,0.15625
eval/runtime,1.8655
eval/samples_per_second,17.154
eval/steps_per_second,1.072
final_accuracy,0.15625
final_f1,0.07083


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for RMSProp_cosine_with_restarts...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0781,2.053473,0.15625,0.042517,0.024606,0.15625
20,1.8833,1.531483,0.46875,0.43224,0.460962,0.46875
30,1.4552,1.160432,0.695312,0.692808,0.704568,0.695312
40,1.159,1.119615,0.710938,0.706062,0.728813,0.710938


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating RMSProp_cosine_with_restarts...



Running experiment: RMSProp_constant_with_warmup



0,1
eval/accuracy,▁▅██▇
eval/f1,▁▅██▇
eval/loss,█▄▁▁▁
eval/precision,▁▅███
eval/recall,▁▅██▇
eval/runtime,████▁
eval/samples_per_second,▇█▆█▁
eval/steps_per_second,▇█▆█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.5969
eval/loss,1.13021
eval/precision,0.7322
eval/recall,0.625
eval/runtime,1.7888
eval/samples_per_second,17.889
eval/steps_per_second,1.118
final_accuracy,0.625
final_f1,0.5969


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for RMSProp_constant_with_warmup...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0826,2.01329,0.195312,0.080926,0.051322,0.195312
20,2.001,1.73453,0.375,0.324564,0.468404,0.375
30,1.8858,1.568198,0.484375,0.4711,0.566581,0.484375
40,1.6755,1.524685,0.453125,0.426373,0.485537,0.453125


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating RMSProp_constant_with_warmup...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Running experiment: AdaGrad_linear



0,1
eval/accuracy,▁▅█▇▅
eval/f1,▁▅█▇▆
eval/loss,█▄▂▁▃
eval/precision,▁▇█▇█
eval/recall,▁▅█▇▅
eval/runtime,████▁
eval/samples_per_second,████▁
eval/steps_per_second,████▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.375
eval/f1,0.37296
eval/loss,1.64107
eval/precision,0.56042
eval/recall,0.375
eval/runtime,2.0846
eval/samples_per_second,15.351
eval/steps_per_second,0.959
final_accuracy,0.375
final_f1,0.37296


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdaGrad_linear...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.029,1.904799,0.453125,0.412828,0.451332,0.453125
20,1.8421,1.768967,0.617188,0.600757,0.651637,0.617188
30,1.7729,1.70488,0.6875,0.677798,0.693264,0.6875
40,1.7233,1.683993,0.695312,0.690554,0.709719,0.695312


Evaluating AdaGrad_linear...



Running experiment: AdaGrad_cosine



0,1
eval/accuracy,▁▆██▆
eval/f1,▁▆██▆
eval/loss,█▄▂▁▁
eval/precision,▁▆▇▇█
eval/recall,▁▆██▆
eval/runtime,████▁
eval/samples_per_second,▇▇▄█▁
eval/steps_per_second,▇▇▄█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.60608
eval/loss,1.66723
eval/precision,0.74033
eval/recall,0.625
eval/runtime,1.7604
eval/samples_per_second,18.178
eval/steps_per_second,1.136
final_accuracy,0.625
final_f1,0.60608


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdaGrad_cosine...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0283,1.895772,0.46875,0.431102,0.473485,0.46875
20,1.8286,1.750714,0.625,0.615312,0.672165,0.625
30,1.7545,1.687636,0.671875,0.667164,0.678261,0.671875
40,1.7136,1.67778,0.679688,0.675768,0.692624,0.679688


Evaluating AdaGrad_cosine...



Running experiment: AdaGrad_polynomial



0,1
eval/accuracy,▁▆██▇
eval/f1,▁▆██▇
eval/loss,█▄▂▁▁
eval/precision,▁▇███
eval/recall,▁▆██▇
eval/runtime,████▁
eval/samples_per_second,▇▆▅█▁
eval/steps_per_second,▇▆▅█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.65625
eval/f1,0.65455
eval/loss,1.66387
eval/precision,0.69271
eval/recall,0.65625
eval/runtime,1.7982
eval/samples_per_second,17.796
eval/steps_per_second,1.112
final_accuracy,0.65625
final_f1,0.65455


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdaGrad_polynomial...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.029,1.904788,0.453125,0.412828,0.451332,0.453125
20,1.8436,1.771494,0.625,0.609987,0.66055,0.625
30,1.7779,1.706696,0.664062,0.656226,0.670772,0.664062
40,1.7271,1.685879,0.679688,0.677485,0.703575,0.679688


Evaluating AdaGrad_polynomial...



Running experiment: AdaGrad_cyclic



0,1
eval/accuracy,▁▆██▆
eval/f1,▁▆▇█▆
eval/loss,█▄▂▂▁
eval/precision,▁▇▇██
eval/recall,▁▆██▆
eval/runtime,████▁
eval/samples_per_second,▂▇▃█▁
eval/steps_per_second,▂▇▃█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.6117
eval/loss,1.66859
eval/precision,0.70156
eval/recall,0.625
eval/runtime,1.7216
eval/samples_per_second,18.587
eval/steps_per_second,1.162
final_accuracy,0.625
final_f1,0.6117


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdaGrad_cyclic...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0588,1.987516,0.375,0.356469,0.385841,0.375
20,1.9073,1.82782,0.59375,0.575573,0.608016,0.59375
30,1.8032,1.73922,0.625,0.620006,0.633901,0.625
40,1.7415,1.705353,0.640625,0.637246,0.655621,0.640625


Evaluating AdaGrad_cyclic...



Running experiment: AdaGrad_exponential



0,1
eval/accuracy,▁▇██▇
eval/f1,▁▆██▇
eval/loss,█▄▂▁▁
eval/precision,▁▇▇█▇
eval/recall,▁▇██▇
eval/runtime,████▁
eval/samples_per_second,█▇█▇▁
eval/steps_per_second,█▇█▇▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.59375
eval/f1,0.58045
eval/loss,1.68392
eval/precision,0.63542
eval/recall,0.59375
eval/runtime,1.8159
eval/samples_per_second,17.622
eval/steps_per_second,1.101
final_accuracy,0.59375
final_f1,0.58045


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdaGrad_exponential...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9509,1.830168,0.570312,0.539615,0.600633,0.570312
20,1.7896,1.742023,0.617188,0.585584,0.620709,0.617188
30,1.7613,1.705084,0.648438,0.623617,0.673866,0.648438
40,1.7291,1.683209,0.664062,0.644618,0.693414,0.664062


Evaluating AdaGrad_exponential...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Running experiment: AdaGrad_constant



0,1
eval/accuracy,▁▅▇█▃
eval/f1,▁▄▇█▃
eval/loss,█▄▂▁▂
eval/precision,▁▃▇█▆
eval/recall,▁▅▇█▃
eval/runtime,▇█▇█▁
eval/samples_per_second,█▁█▃▆
eval/steps_per_second,█▁█▃▆
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.59375
eval/f1,0.573
eval/loss,1.69698
eval/precision,0.66183
eval/recall,0.59375
eval/runtime,1.7457
eval/samples_per_second,18.331
eval/steps_per_second,1.146
final_accuracy,0.59375
final_f1,0.573


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdaGrad_constant...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.9393,1.785121,0.640625,0.620058,0.678666,0.640625
20,1.7119,1.617373,0.664062,0.647575,0.722936,0.664062
30,1.5821,1.468101,0.71875,0.719058,0.734227,0.71875
40,1.4626,1.372445,0.703125,0.697905,0.736233,0.703125


Evaluating AdaGrad_constant...



Running experiment: AdaGrad_cosine_with_restarts



0,1
eval/accuracy,▁▃█▇▅
eval/f1,▁▃█▇▆
eval/loss,█▅▃▁▂
eval/precision,▁▃▄▄█
eval/recall,▁▃█▇▅
eval/runtime,██▇█▁
eval/samples_per_second,▇▄█▇▁
eval/steps_per_second,▇▄█▇▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.6875
eval/f1,0.68512
eval/loss,1.40856
eval/precision,0.82812
eval/recall,0.6875
eval/runtime,1.8806
eval/samples_per_second,17.016
eval/steps_per_second,1.064
final_accuracy,0.6875
final_f1,0.68512


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdaGrad_cosine_with_restarts...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0283,1.895772,0.46875,0.431102,0.473485,0.46875
20,1.8286,1.750714,0.625,0.615312,0.672165,0.625
30,1.7545,1.687636,0.671875,0.667164,0.678261,0.671875
40,1.7136,1.67778,0.679688,0.675768,0.692624,0.679688


Evaluating AdaGrad_cosine_with_restarts...



Running experiment: AdaGrad_constant_with_warmup



0,1
eval/accuracy,▁▆██▇
eval/f1,▁▆██▇
eval/loss,█▄▂▁▁
eval/precision,▁▇███
eval/recall,▁▆██▇
eval/runtime,████▁
eval/samples_per_second,███▁▁
eval/steps_per_second,███▁▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.65625
eval/f1,0.65455
eval/loss,1.66387
eval/precision,0.69271
eval/recall,0.65625
eval/runtime,1.7498
eval/samples_per_second,18.288
eval/steps_per_second,1.143
final_accuracy,0.65625
final_f1,0.65455


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for AdaGrad_constant_with_warmup...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0283,1.892875,0.46875,0.431102,0.473485,0.46875
20,1.814,1.711545,0.664062,0.655242,0.686345,0.664062
30,1.6849,1.566004,0.664062,0.660124,0.66743,0.664062
40,1.5466,1.455386,0.664062,0.664035,0.734983,0.664062


Evaluating AdaGrad_constant_with_warmup...



Running experiment: SGD_linear



0,1
eval/accuracy,▁▆▆▆█
eval/f1,▁▆▆▆█
eval/loss,█▅▃▁▁
eval/precision,▁▅▅▆█
eval/recall,▁▆▆▆█
eval/runtime,████▁
eval/samples_per_second,▄▇██▁
eval/steps_per_second,▄▇██▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.75
eval/f1,0.73559
eval/loss,1.47829
eval/precision,0.85417
eval/recall,0.75
eval/runtime,1.8388
eval/samples_per_second,17.402
eval/steps_per_second,1.088
final_accuracy,0.75
final_f1,0.73559


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for SGD_linear...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0402,1.875883,0.539062,0.506938,0.533774,0.539062
20,1.7121,1.530677,0.710938,0.694397,0.718191,0.710938
30,1.4684,1.32256,0.734375,0.723579,0.758398,0.734375
40,1.3328,1.25926,0.742188,0.729673,0.773403,0.742188


Evaluating SGD_linear...



Running experiment: SGD_cosine



0,1
eval/accuracy,▁▇██▄
eval/f1,▁▇██▃
eval/loss,█▄▂▁▁
eval/precision,▁▆██▂
eval/recall,▁▇██▄
eval/runtime,█▅▇▇▁
eval/samples_per_second,▁█▂▂▇
eval/steps_per_second,▁█▂▂▇
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.57761
eval/loss,1.23707
eval/precision,0.56944
eval/recall,0.625
eval/runtime,1.8263
eval/samples_per_second,17.522
eval/steps_per_second,1.095
final_accuracy,0.625
final_f1,0.57761


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for SGD_cosine...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0389,1.864676,0.539062,0.508032,0.529822,0.539062
20,1.6837,1.481734,0.710938,0.695309,0.723851,0.710938
30,1.4274,1.293991,0.734375,0.721809,0.7638,0.734375
40,1.3247,1.264845,0.742188,0.732071,0.771488,0.742188


Evaluating SGD_cosine...



Running experiment: SGD_polynomial



0,1
eval/accuracy,▁▇██▄
eval/f1,▁▇██▃
eval/loss,█▄▂▁▁
eval/precision,▁▇██▂
eval/recall,▁▇██▄
eval/runtime,▇█▇▇▁
eval/samples_per_second,▇▁██▃
eval/steps_per_second,▇▁██▃
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.56942
eval/loss,1.24508
eval/precision,0.56027
eval/recall,0.625
eval/runtime,1.843
eval/samples_per_second,17.363
eval/steps_per_second,1.085
final_accuracy,0.625
final_f1,0.56942


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for SGD_polynomial...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0402,1.875883,0.539062,0.506938,0.533774,0.539062
20,1.7121,1.530592,0.710938,0.694397,0.718191,0.710938
30,1.4665,1.316684,0.742188,0.731327,0.769627,0.742188
40,1.3266,1.251603,0.75,0.740272,0.788163,0.75


Evaluating SGD_polynomial...



Running experiment: SGD_cyclic



0,1
eval/accuracy,▁▇██▄
eval/f1,▁▇██▃
eval/loss,█▄▂▁▁
eval/precision,▁▆▇█▂
eval/recall,▁▇██▄
eval/runtime,▆▅█▅▁
eval/samples_per_second,▆█▄█▁
eval/steps_per_second,▆█▄█▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.57439
eval/loss,1.22946
eval/precision,0.57019
eval/recall,0.625
eval/runtime,3.1253
eval/samples_per_second,10.239
eval/steps_per_second,0.64
final_accuracy,0.625
final_f1,0.57439


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for SGD_cyclic...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0693,1.963961,0.421875,0.394648,0.422122,0.421875
20,1.7616,1.528975,0.703125,0.681888,0.714156,0.703125
30,1.423,1.250856,0.734375,0.719621,0.786035,0.734375
40,1.242,1.15827,0.75,0.73816,0.79348,0.75


Evaluating SGD_cyclic...



Running experiment: SGD_exponential



0,1
eval/accuracy,▁▇██▅
eval/f1,▁▇██▅
eval/loss,█▄▂▁▁
eval/precision,▁▇██▄
eval/recall,▁▇██▅
eval/runtime,█▆▆▆▁
eval/samples_per_second,▃███▁
eval/steps_per_second,▃███▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.57264
eval/loss,1.12829
eval/precision,0.55952
eval/recall,0.625
eval/runtime,2.5684
eval/samples_per_second,12.459
eval/steps_per_second,0.779
final_accuracy,0.625
final_f1,0.57264


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for SGD_exponential...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.027,1.889268,0.507812,0.472801,0.478262,0.507812
20,1.7701,1.647508,0.671875,0.656179,0.688578,0.671875
30,1.6051,1.486669,0.703125,0.687895,0.723122,0.703125
40,1.479,1.398493,0.710938,0.697121,0.73838,0.710938


Evaluating SGD_exponential...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Running experiment: SGD_constant



0,1
eval/accuracy,▁▇██▅
eval/f1,▁▇██▄
eval/loss,█▅▂▁▁
eval/precision,▁▇██▃
eval/recall,▁▇██▅
eval/runtime,█▇▇▇▁
eval/samples_per_second,▁▇▆█▅
eval/steps_per_second,▁▇▆█▅
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.56845
eval/loss,1.38423
eval/precision,0.54601
eval/recall,0.625
eval/runtime,1.7602
eval/samples_per_second,18.179
eval/steps_per_second,1.136
final_accuracy,0.625
final_f1,0.56845


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for SGD_constant...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0132,1.823707,0.554688,0.527025,0.538492,0.554688
20,1.618,1.358081,0.75,0.73612,0.772129,0.75
30,1.2415,1.012988,0.734375,0.717402,0.779032,0.734375
40,0.9451,0.819525,0.742188,0.72829,0.785069,0.742188


Evaluating SGD_constant...



Running experiment: SGD_cosine_with_restarts



0,1
eval/accuracy,▁█▇█▅
eval/f1,▁█▇█▄
eval/loss,█▅▂▁▁
eval/precision,▁███▃
eval/recall,▁█▇█▅
eval/runtime,████▁
eval/samples_per_second,▁█▇▇▃
eval/steps_per_second,▁█▇▇▃
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.65625
eval/f1,0.61181
eval/loss,0.80828
eval/precision,0.61632
eval/recall,0.65625
eval/runtime,1.7265
eval/samples_per_second,18.534
eval/steps_per_second,1.158
final_accuracy,0.65625
final_f1,0.61181


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for SGD_cosine_with_restarts...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0389,1.864676,0.539062,0.508032,0.529822,0.539062
20,1.6837,1.481734,0.710938,0.695309,0.723851,0.710938
30,1.4274,1.293991,0.734375,0.721809,0.7638,0.734375
40,1.3247,1.264845,0.742188,0.732071,0.771488,0.742188


Evaluating SGD_cosine_with_restarts...



Running experiment: SGD_constant_with_warmup



0,1
eval/accuracy,▁▇██▄
eval/f1,▁▇██▃
eval/loss,█▄▂▁▁
eval/precision,▁▇██▂
eval/recall,▁▇██▄
eval/runtime,████▁
eval/samples_per_second,▁▆█▇▁
eval/steps_per_second,▁▆█▇▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.625
eval/f1,0.56942
eval/loss,1.24508
eval/precision,0.56027
eval/recall,0.625
eval/runtime,1.7472
eval/samples_per_second,18.315
eval/steps_per_second,1.145
final_accuracy,0.625
final_f1,0.56942


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training for SGD_constant_with_warmup...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0386,1.860518,0.539062,0.508032,0.529822,0.539062
20,1.6532,1.390407,0.75,0.730923,0.777988,0.75
30,1.2684,1.036549,0.726562,0.705841,0.778798,0.726562
40,0.9655,0.835922,0.75,0.735915,0.785297,0.75


Evaluating SGD_constant_with_warmup...


0,1
eval/accuracy,▁█▇█▅
eval/f1,▁█▇█▄
eval/loss,█▅▃▁▁
eval/precision,▁███▃
eval/recall,▁█▇█▅
eval/runtime,████▁
eval/samples_per_second,▂▇█▅▁
eval/steps_per_second,▂▇█▅▁
final_accuracy,▁
final_f1,▁

0,1
eval/accuracy,0.65625
eval/f1,0.61181
eval/loss,0.79155
eval/precision,0.61632
eval/recall,0.65625
eval/runtime,1.7651
eval/samples_per_second,18.129
eval/steps_per_second,1.133
final_accuracy,0.65625
final_f1,0.61181


Results Summary:
                                  Experiment  Accuracy  F1 Score  Precision  \
0                 schedule_free_adamw_linear   0.68750  0.669766   0.759859   
1                 schedule_free_adamw_cosine   0.71875  0.701389   0.764881   
2             schedule_free_adamw_polynomial   0.71875  0.701389   0.764881   
3                 schedule_free_adamw_cyclic   0.71875  0.706818   0.747396   
4            schedule_free_adamw_exponential   0.59375  0.587500   0.622024   
5               schedule_free_adamw_constant   0.75000  0.739574   0.773438   
6   schedule_free_adamw_cosine_with_restarts   0.71875  0.701389   0.764881   
7   schedule_free_adamw_constant_with_warmup   0.81250  0.811291   0.895833   
8                               AdamW_linear   0.68750  0.677604   0.726190   
9                               AdamW_cosine   0.75000  0.737847   0.810268   
10                          AdamW_polynomial   0.68750  0.677604   0.726190   
11                              Ada

In [7]:
# Identify best of learning rate sweep:
# import pandas as pd

# # Load the exported CSV file
# df = pd.read_csv("wandb_export.csv")

# # Group by optimizer and find the best learning rate for each
# best_lr_per_optimizer = (
#     df.groupby("optimizer_name")
#     .apply(lambda group: group.loc[group["val_accuracy"].idxmax()])
#     [["optimizer_name", "learning_rate", "val_accuracy"]]
# )

# print(best_lr_per_optimizer)