In [None]:
# !pip install --upgrade pip
# !pip install tokenizers
# !pip install datasets --upgrade evaluate
# !pip install transformers
# !pip install numpy torch matplotlib pandas scikit-learn tqdm pillow
# !pip install datasets evaluate transformers
# !pip install torchvision
# !pip install setuptools
# !pip install wandb
# !pip show wandb

In [None]:
import os
import numpy as np
import torch
import matplotlib.pyplot as plt
import pandas as pd
from datasets import load_dataset
from evaluate import load
from transformers import (
    ViTFeatureExtractor,
    ViTForImageClassification,
    TrainingArguments,
    Trainer,
    get_scheduler,
    AutoImageProcessor
)

from torch.optim import AdamW, SGD
import wandb
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torchvision.transforms as transforms
from PIL import Image
import random
from tqdm.auto import tqdm
from sklearn.metrics import confusion_matrix

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Set seed for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed()

# Initialize experiment tracking
def init_wandb(project_name, experiment_name, config):
    return wandb.init(
        # entity="dl_project_sp25",
        project=project_name,
        name=experiment_name,
        config=config,
        reinit=True
    )

# Load and prepare dataset
def prepare_dataset(dataset_name, image_processor):
    """
    Load and prepare a dataset from Hugging Face for ViT fine-tuning
    """
    # Load the dataset
    print(f"Loading dataset: {dataset_name}")
    dataset = load_dataset(dataset_name)

    # Get label information
    if "label" in dataset["train"].features:
        labels = dataset["train"].features["label"].names
    elif "labels" in dataset["train"].features:
        labels = dataset["train"].features["labels"].names
    else:
        # Count unique labels and create labels list
        all_labels = dataset["train"][0]["label"] if "label" in dataset["train"][0] else dataset["train"][0]["labels"]
        num_labels = len(set(all_labels))
        labels = [str(i) for i in range(num_labels)]

    # Create label mappings
    label2id = {label: i for i, label in enumerate(labels)}
    id2label = {i: label for i, label in enumerate(labels)}

    # Set up image transformations based on the model's requirements
    normalize = transforms.Normalize(
        mean=image_processor.image_mean,
        std=image_processor.image_std
    )

    # Get the expected image size
    if "shortest_edge" in image_processor.size:
        size = image_processor.size["shortest_edge"]
    else:
        size = (image_processor.size["height"], image_processor.size["width"])

    # Define transforms for training data
    train_transforms = transforms.Compose([
        transforms.RandomResizedCrop(size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    # Define transforms for validation/test data
    val_transforms = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.ToTensor(),
        normalize,
    ])

    # Apply transformations to the dataset
    def preprocess_train(examples):
        examples["pixel_values"] = [
            train_transforms(image.convert("RGB"))
            for image in examples["image"]
        ]
        return examples

    def preprocess_val(examples):
        examples["pixel_values"] = [
            val_transforms(image.convert("RGB"))
            for image in examples["image"]
        ]
        return examples

    # Apply preprocessing to each split
    train_dataset = dataset["train"].map(
        preprocess_train, batched=True, remove_columns=["image"]
    )

    if "validation" in dataset:
        val_dataset = dataset["validation"].map(
            preprocess_val, batched=True, remove_columns=["image"]
        )
    else:
        # Create a validation split if none exists
        splits = train_dataset.train_test_split(test_size=0.1)
        train_dataset = splits["train"]
        val_dataset = splits["test"]

    if "test" in dataset:
        test_dataset = dataset["test"].map(
            preprocess_val, batched=True, remove_columns=["image"]
        )
    else:
        test_dataset = val_dataset

    print(f"Dataset prepared with {len(train_dataset)} training, {len(val_dataset)} validation, and {len(test_dataset)} test examples")

    return train_dataset, val_dataset, test_dataset, id2label, label2id

# Define compute_metrics function for evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    acc = accuracy_score(labels, predictions)

    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
    }

# Main experiment pipeline
def run_vit_experiment(config):
    """
    Run a ViT experiment with the specified configuration
    """
    # Initialize wandb for experiment tracking
    run = init_wandb("ViT-LR-Schedulers", config["experiment_name"], config)

    # Load the image processor for the model
    image_processor = AutoImageProcessor.from_pretrained(config["model_name"])

    # Prepare the dataset
    train_dataset, val_dataset, test_dataset, id2label, label2id = prepare_dataset(
        config["dataset_name"], image_processor
    )

    # Load the ViT model
    model = ViTForImageClassification.from_pretrained(
        config["model_name"],
        num_labels=len(id2label),
        id2label=id2label,
        label2id=label2id,
        ignore_mismatched_sizes=True
    )

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f"./results/{config['experiment_name']}",
        per_device_train_batch_size=config["batch_size"],
        per_device_eval_batch_size=config["batch_size"],
        num_train_epochs=config["num_epochs"],
        weight_decay=config["weight_decay"],
        eval_strategy="epoch",
        save_strategy="epoch",
        logging_strategy="steps",  # Ensure logging is enabled
        logging_steps=10,          # Log every 10 steps (adjust as needed)
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        push_to_hub=False,
        report_to="wandb",
        remove_unused_columns=False,
        learning_rate=config["learning_rate"],
    )

    # Setup optimizer
    if config["optimizer_name"] == "AdamW":
        optimizer = AdamW(model.parameters(), lr=config["learning_rate"])
    else:  # SGD
        optimizer = SGD(model.parameters(), lr=config["learning_rate"], momentum=0.9)

    # Setup scheduler
    num_training_steps = len(train_dataset) // config["batch_size"] * config["num_epochs"]
    num_warmup_steps = int(num_training_steps * config["warmup_ratio"]) if "warmup_ratio" in config else 0

    scheduler_name = config["scheduler_name"]
    if scheduler_name == "linear":
        scheduler = get_scheduler(
            "linear",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )
    elif scheduler_name == "cosine":
        scheduler = get_scheduler(
            "cosine",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )
    elif scheduler_name == "cosine_with_restarts":
        scheduler = get_scheduler(
            "cosine_with_restarts",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps,
        )
    elif scheduler_name == "polynomial":
        scheduler = get_scheduler(
            "polynomial",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps,
            # power=config.get("poly_power", 1.0),
        )
    elif scheduler_name == "constant":
        scheduler = get_scheduler(
            "constant",
            optimizer=optimizer,
        )
    elif scheduler_name == "constant_with_warmup":
        scheduler = get_scheduler(
            "constant_with_warmup",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
        )
    # add more experiments.


    else:
        raise ValueError(f"Scheduler {scheduler_name} not supported")

    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        optimizers=(optimizer, scheduler)
    )

    # Hook into the training loop to log metrics to W&B
    for epoch in range(config["num_epochs"]):
        print(f"Epoch {epoch + 1}/{config['num_epochs']}")

        # Train for one epoch
        trainer.train()

        # Evaluate on validation set
        eval_results = trainer.evaluate(val_dataset)

        print(trainer.state.log_history)
        if trainer.state.log_history and "loss" in trainer.state.log_history[-1]:
            train_loss = trainer.state.log_history[-1]["loss"]
        else:
            train_loss = None

        # Log training and validation metrics to W&B
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": trainer.state.log_history[-1].get("loss", None),
            "val_loss": eval_results["eval_loss"],
            "val_accuracy": eval_results["eval_accuracy"],
        })

    # Train the model
    print(f"Starting training for {config['experiment_name']}...")
    trainer.train()

    # Evaluate the model
    print(f"Evaluating {config['experiment_name']}...")
    eval_results = trainer.evaluate(test_dataset)


    # Log final metrics
    wandb.log({
        "final_accuracy": eval_results["eval_accuracy"],
        "final_f1": eval_results["eval_f1"],
        "final_precision": eval_results["eval_precision"],
        "final_recall": eval_results["eval_recall"],
    })

    # Compute confusion matrix for test set
    predictions, labels, _ = trainer.predict(test_dataset)
    predictions = np.argmax(predictions, axis=1)

    # Convert to lists
    labels = labels.tolist()
    predictions = predictions.tolist()

    # Log confusion matrix to W&B
    wandb.log({
        "confusion_matrix_test": wandb.plot.confusion_matrix(
            probs=None,
            y_true=labels,
            preds=predictions,
            class_names=[str(i) for i in range(len(np.unique(labels)))]
        )
    })



    # Save the model
    trainer.save_model(f"./saved_models/{config['experiment_name']}")

    # Finish wandb run
    wandb.finish()

    return eval_results

# Get experiment configurations for challenging datasets
def get_experiment_configs():
    # We'll use a more complex dataset from Hugging Face
    base_config = {
        "model_name": "google/vit-base-patch16-224-in21k",
        "dataset_name": "jbarat/plant_species",  # Any challenging dataset.
        "batch_size": 16,
        "num_epochs": 3, # let's keep smaller number to begin with.
        "weight_decay": 0.01,
        "optimizer_name": "AdamW",
    }

    # Different learning rate scheduler configurations
    configs = []

    # Constant learning rate (baseline)
    configs.append({
        **base_config,
        "experiment_name": "vit_constant_lr",
        "learning_rate": 5e-5,
        "scheduler_name": "constant",
    })

    # Linear decay
    configs.append({
        **base_config,
        "experiment_name": "vit_linear_decay",
        "learning_rate": 5e-5,
        "scheduler_name": "linear",
        "warmup_ratio": 0.1,
    })

    # Cosine decay (commonly used with ViT)
    configs.append({
        **base_config,
        "experiment_name": "vit_cosine_decay",
        "learning_rate": 5e-5,
        "scheduler_name": "cosine",
        "warmup_ratio": 0.1,
    })

    # Cosine with restarts
    configs.append({
        **base_config,
        "experiment_name": "vit_cosine_restarts",
        "learning_rate": 5e-5,
        "scheduler_name": "cosine_with_restarts",
        "warmup_ratio": 0.1,
    })

    # Polynomial decay
    configs.append({
        **base_config,
        "experiment_name": "vit_polynomial",
        "learning_rate": 5e-5,
        "scheduler_name": "polynomial",
        "warmup_ratio": 0.1,
        "poly_power": 2.0,
    })

    # Constant with warmup
    configs.append({
        **base_config,
        "experiment_name": "vit_constant_warmup",
        "learning_rate": 5e-5,
        "scheduler_name": "constant_with_warmup",
        "warmup_ratio": 0.1,
    })

    # Different learning rate experiments
    for lr in [1e-5, 3e-5, 1e-4]:
        configs.append({
            **base_config,
            "experiment_name": f"vit_cosine_lr_{lr}",
            "learning_rate": lr,
            "scheduler_name": "cosine",
            "warmup_ratio": 0.1,
        })

    # Different optimizer experiments
    configs.append({
        **base_config,
        "experiment_name": "vit_sgd_cosine",
        "learning_rate": 0.01,  # Higher LR for SGD
        "scheduler_name": "cosine",
        "warmup_ratio": 0.1,
        "optimizer_name": "SGD",
    })

    # here we can make changes to add new datasets to experiment.
    # or change batch_size to see the impact.
    # Other datasets to try (uncomment to use)
    #   Erik: We can use a data set as a strech. Maybe something less similar than plants for better contrasting comparison?
    # flowers dataset
    # configs.append({
    #     **base_config,
    #     "dataset_name": "huggan/flowers",
    #     "experiment_name": "vit_flowers_cosine",
    #     "learning_rate": 5e-5,
    #     "scheduler_name": "cosine",
    #     "warmup_ratio": 0.1,
    # })

    return configs

# Run experiments and visualize results
def run_all_experiments():
    configs = get_experiment_configs()
    results = []

    for config in configs:
        print(f"\n{'='*50}")
        print(f"Running experiment: {config['experiment_name']}")
        print(f"{'='*50}\n")

        eval_results = run_vit_experiment(config)
        results.append({
            "experiment": config['experiment_name'],
            "accuracy": eval_results["eval_accuracy"],
            "f1": eval_results["eval_f1"],
            "precision": eval_results["eval_precision"],
            "recall": eval_results["eval_recall"],
            "config": config
        })

    return results

# Visualize and compare results
def visualize_results(results):
    # Create DataFrame for easier plotting
    df = pd.DataFrame([
        {
            "Experiment": result["experiment"],
            "Accuracy": result["accuracy"],
            "F1 Score": result["f1"],
            "Precision": result["precision"],
            "Recall": result["recall"],
            "Learning Rate": result["config"]["learning_rate"],
            "Scheduler": result["config"]["scheduler_name"],
            "Optimizer": result["config"]["optimizer_name"],
            "Dataset": result["config"]["dataset_name"]
        }
        for result in results
    ])

    # Plot accuracy comparison
    plt.figure(figsize=(14, 8))
    ax = plt.bar(df["Experiment"], df["Accuracy"], color='skyblue')
    plt.xlabel('Experiment')
    plt.ylabel('Accuracy')
    plt.title('Comparison of Model Accuracy Across Experiments')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig("accuracy_comparison.png")
    plt.close()

    # Plot all metrics for a more comprehensive comparison
    plt.figure(figsize=(16, 10))
    metrics = ["Accuracy", "F1 Score", "Precision", "Recall"]
    x = np.arange(len(df["Experiment"]))
    width = 0.2

    for i, metric in enumerate(metrics):
        plt.bar(x + i*width, df[metric], width=width, label=metric)

    plt.xlabel('Experiment')
    plt.ylabel('Score')
    plt.title('Comparison of Metrics Across Experiments')
    plt.xticks(x + width*1.5, df["Experiment"], rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.savefig("metrics_comparison.png")
    plt.close()

    # Plot results by scheduler type
    plt.figure(figsize=(14, 8))
    schedulers = df["Scheduler"].unique()
    for scheduler in schedulers:
        scheduler_data = df[df["Scheduler"] == scheduler]
        plt.plot(scheduler_data["Learning Rate"], scheduler_data["Accuracy"], 'o-', label=scheduler)

    plt.xlabel('Learning Rate')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs. Learning Rate by Scheduler Type')
    plt.xscale('log')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("scheduler_comparison.png")
    plt.close()

    # Create a table with results
    print("Results Summary:")
    print(df[["Experiment", "Accuracy", "F1 Score", "Precision", "Recall", "Scheduler", "Learning Rate", "Optimizer", "Dataset"]])

    # Save results to CSV
    df.to_csv("experiment_results.csv", index=False)

    return df

# Function to run a single experiment (useful for testing)
def run_single_experiment(experiment_index=0):
    configs = get_experiment_configs()
    if experiment_index >= len(configs):
        print(f"Invalid experiment index. Choose between 0 and {len(configs)-1}")
        return

    config = configs[experiment_index]
    print(f"Running single experiment: {config['experiment_name']}")
    eval_results = run_vit_experiment(config)

    print(f"\nResults for {config['experiment_name']}:")
    print(f"Accuracy: {eval_results['eval_accuracy']:.4f}")
    print(f"F1 Score: {eval_results['eval_f1']:.4f}")
    print(f"Precision: {eval_results['eval_precision']:.4f}")
    print(f"Recall: {eval_results['eval_recall']:.4f}")

    return eval_results

# Main execution
if __name__ == "__main__":
    print("Starting ViT experiments with different learning rate schedulers...")
    os.environ["WANDB_PROJECT"] = "ViT-LR-Schedulers"

    # Option 1: Run all experiments (time-consuming)
    # results = run_all_experiments()
    # results_df = visualize_results(results)

    # Option 2: Run a single experiment for testing
    run_single_experiment(0)  # Try the baseline experiment first

    print("Experiments completed!")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Starting ViT experiments with different learning rate schedulers...
Running single experiment: vit_constant_lr


[34m[1mwandb[0m: Currently logged in as: [33mewg[0m ([33mewg-georgia-institute-of-technology[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


Loading dataset: jbarat/plant_species
Dataset prepared with 720 training, 80 validation, and 80 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.5536,1.310789,0.875,0.871815,0.890956,0.875
2,0.8641,0.806348,0.925,0.925496,0.931786,0.925
3,0.5756,0.579779,0.9,0.900452,0.906364,0.9


[{'loss': 2.0216, 'grad_norm': 1.8287012577056885, 'learning_rate': 5e-05, 'epoch': 0.2222222222222222, 'step': 10}, {'loss': 1.8506, 'grad_norm': 1.8933261632919312, 'learning_rate': 5e-05, 'epoch': 0.4444444444444444, 'step': 20}, {'loss': 1.7043, 'grad_norm': 2.1105501651763916, 'learning_rate': 5e-05, 'epoch': 0.6666666666666666, 'step': 30}, {'loss': 1.5536, 'grad_norm': 1.9867291450500488, 'learning_rate': 5e-05, 'epoch': 0.8888888888888888, 'step': 40}, {'eval_loss': 1.310788869857788, 'eval_accuracy': 0.875, 'eval_f1': 0.8718154761904762, 'eval_precision': 0.8909558823529412, 'eval_recall': 0.875, 'eval_runtime': 4.2443, 'eval_samples_per_second': 18.849, 'eval_steps_per_second': 1.178, 'epoch': 1.0, 'step': 45}, {'loss': 1.3298, 'grad_norm': 1.7887920141220093, 'learning_rate': 5e-05, 'epoch': 1.1111111111111112, 'step': 50}, {'loss': 1.172, 'grad_norm': 1.8255621194839478, 'learning_rate': 5e-05, 'epoch': 1.3333333333333333, 'step': 60}, {'loss': 1.0532, 'grad_norm': 1.814474

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5381,0.616426,0.8875,0.882586,0.908857,0.8875
2,0.2618,0.483564,0.9,0.898191,0.915524,0.9
3,0.1622,0.425731,0.8875,0.886888,0.900455,0.8875


[{'loss': 0.7048, 'grad_norm': 1.5014983415603638, 'learning_rate': 5e-05, 'epoch': 0.2222222222222222, 'step': 10}, {'loss': 0.5867, 'grad_norm': 1.229650855064392, 'learning_rate': 5e-05, 'epoch': 0.4444444444444444, 'step': 20}, {'loss': 0.5645, 'grad_norm': 2.949812650680542, 'learning_rate': 5e-05, 'epoch': 0.6666666666666666, 'step': 30}, {'loss': 0.5381, 'grad_norm': 2.3821589946746826, 'learning_rate': 5e-05, 'epoch': 0.8888888888888888, 'step': 40}, {'eval_loss': 0.6164258718490601, 'eval_accuracy': 0.8875, 'eval_f1': 0.8825857831206628, 'eval_precision': 0.9088568376068377, 'eval_recall': 0.8875, 'eval_runtime': 4.0002, 'eval_samples_per_second': 19.999, 'eval_steps_per_second': 1.25, 'epoch': 1.0, 'step': 45}, {'loss': 0.4243, 'grad_norm': 1.3097175359725952, 'learning_rate': 5e-05, 'epoch': 1.1111111111111112, 'step': 50}, {'loss': 0.343, 'grad_norm': 1.226622462272644, 'learning_rate': 5e-05, 'epoch': 1.3333333333333333, 'step': 60}, {'loss': 0.3161, 'grad_norm': 1.3184051

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1728,0.367297,0.9125,0.911208,0.918889,0.9125
2,0.1213,0.340902,0.9125,0.911619,0.922445,0.9125
3,0.082,0.336166,0.9,0.899527,0.909722,0.9


[{'loss': 0.213, 'grad_norm': 0.6806967854499817, 'learning_rate': 5e-05, 'epoch': 0.2222222222222222, 'step': 10}, {'loss': 0.1776, 'grad_norm': 0.5066623091697693, 'learning_rate': 5e-05, 'epoch': 0.4444444444444444, 'step': 20}, {'loss': 0.1651, 'grad_norm': 0.49694040417671204, 'learning_rate': 5e-05, 'epoch': 0.6666666666666666, 'step': 30}, {'loss': 0.1728, 'grad_norm': 0.4016141891479492, 'learning_rate': 5e-05, 'epoch': 0.8888888888888888, 'step': 40}, {'eval_loss': 0.36729711294174194, 'eval_accuracy': 0.9125, 'eval_f1': 0.9112084163671519, 'eval_precision': 0.918888888888889, 'eval_recall': 0.9125, 'eval_runtime': 4.721, 'eval_samples_per_second': 16.946, 'eval_steps_per_second': 1.059, 'epoch': 1.0, 'step': 45}, {'loss': 0.1361, 'grad_norm': 0.38184911012649536, 'learning_rate': 5e-05, 'epoch': 1.1111111111111112, 'step': 50}, {'loss': 0.129, 'grad_norm': 0.5108432769775391, 'learning_rate': 5e-05, 'epoch': 1.3333333333333333, 'step': 60}, {'loss': 0.117, 'grad_norm': 0.4481

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1066,0.36822,0.9125,0.911619,0.922445,0.9125
2,0.0776,0.336151,0.925,0.923416,0.930675,0.925
3,0.0645,0.342475,0.9125,0.911208,0.918889,0.9125


Evaluating vit_constant_lr...


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
epoch,▁▅█
eval/accuracy,▁█▄█▃▄▃▄▆▆▄▆▆█▆█
eval/f1,▁█▅█▂▄▃▄▆▆▅▆▆█▆█
eval/loss,█▄▃▄▃▂▂▂▁▁▁▁▁▁▁▁
eval/precision,▁█▄█▄▅▃▅▆▆▄▆▆█▆█
eval/recall,▁█▄█▃▄▃▄▆▆▄▆▆█▆█
eval/runtime,▃▄▁▃▁▁▆▂▇▂▂▄▂▂▃█
eval/samples_per_second,▅▅█▅██▃▇▁▇▇▅▆▇▅▁
eval/steps_per_second,▅▅█▅██▃▇▁▇▇▅▆▇▆▁
final_accuracy,▁

0,1
epoch,3.0
eval/accuracy,0.925
eval/f1,0.92342
eval/loss,0.33615
eval/precision,0.93067
eval/recall,0.925
eval/runtime,4.782
eval/samples_per_second,16.729
eval/steps_per_second,1.046
final_accuracy,0.925



Results for vit_constant_lr:
Accuracy: 0.9250
F1 Score: 0.9234
Precision: 0.9307
Recall: 0.9250
Experiments completed!
