In [None]:
# !pip install --upgrade pip
# !pip install tokenizers
# !pip install datasets --upgrade evaluate
# !pip install transformers
# !pip install numpy torch matplotlib pandas scikit-learn tqdm pillow
# !pip install datasets evaluate transformers
# !pip install torchvision
# !pip install setuptools
# !pip install wandb
# !pip show wandb
# !pip install schedulefree
# !pip install nbformat

Collecting nbformat
  Downloading nbformat-5.10.4-py3-none-any.whl.metadata (3.6 kB)
Collecting fastjsonschema>=2.15 (from nbformat)
  Downloading fastjsonschema-2.21.1-py3-none-any.whl.metadata (2.2 kB)
Collecting jsonschema>=2.6 (from nbformat)
  Using cached jsonschema-4.23.0-py3-none-any.whl.metadata (7.9 kB)
Collecting jsonschema-specifications>=2023.03.6 (from jsonschema>=2.6->nbformat)
  Using cached jsonschema_specifications-2024.10.1-py3-none-any.whl.metadata (3.0 kB)
Collecting referencing>=0.28.4 (from jsonschema>=2.6->nbformat)
  Using cached referencing-0.36.2-py3-none-any.whl.metadata (2.8 kB)
Collecting rpds-py>=0.7.1 (from jsonschema>=2.6->nbformat)
  Using cached rpds_py-0.24.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (4.1 kB)
Downloading nbformat-5.10.4-py3-none-any.whl (78 kB)
Downloading fastjsonschema-2.21.1-py3-none-any.whl (23 kB)
Using cached jsonschema-4.23.0-py3-none-any.whl (88 kB)
Using cached jsonschema_specifications-2024.10.1-py3-none-any.whl (18 kB)
Us

In [None]:
import os
import numpy as np
import torch
import matplotlib.pyplot as plt
import pandas as pd
from datasets import load_dataset
from evaluate import load
from transformers import (
    ViTFeatureExtractor,
    ViTForImageClassification,
    TrainingArguments,
    Trainer,
    get_scheduler,
    AutoImageProcessor
)

from torch.optim import AdamW, SGD
import wandb
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torchvision.transforms as transforms
from PIL import Image
import random
from tqdm.auto import tqdm
from sklearn.metrics import confusion_matrix

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
sweep_config = {
    "method": "grid",  # we cna use "grid", "random", or "bayes"
    "metric": {
        "name": "val_accuracy",  # Metric to optimize
        "goal": "maximize"       # Goal: maximize or minimize
    },
    "parameters": {
        "optimizer_name": {
            "values": ["AdamW", "SGD", "RMSProp", "AdaGrad", "schedule_free_adamw"]  # Optimizers to test
        },
        "learning_rate": {
            "values": [2e-5, 2e-4, 2e-3, 2e-2, 2e-1]  # Fixed learning rate for simplicity
        },
        "batch_size": {
            "values": [16]  # Fixed batch size
        },
        "num_epochs": {
            "values": [3]  # Fixed number of epochs
        },
        "scheduler_name": {
            "values": ["cosine"]  # Fixed scheduler for simplicity
        }
    }
}

In [None]:
# Set seed for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed()

# Initialize experiment tracking
def init_wandb(project_name, experiment_name, config):
    return wandb.init(
        # entity="dl_project_sp25",
        project=project_name,
        name=experiment_name,
        config=config,
        reinit=True
    )

# Load and prepare dataset
def prepare_dataset(dataset_name, image_processor):
    """
    Load and prepare a dataset from Hugging Face for ViT fine-tuning
    """
    # Load the dataset
    print(f"Loading dataset: {dataset_name}")
    dataset = load_dataset(dataset_name)

    # Get label information
    if "label" in dataset["train"].features:
        labels = dataset["train"].features["label"].names
    elif "labels" in dataset["train"].features:
        labels = dataset["train"].features["labels"].names
    else:
        # Count unique labels and create labels list
        all_labels = dataset["train"][0]["label"] if "label" in dataset["train"][0] else dataset["train"][0]["labels"]
        num_labels = len(set(all_labels))
        labels = [str(i) for i in range(num_labels)]

    # Create label mappings
    label2id = {label: i for i, label in enumerate(labels)}
    id2label = {i: label for i, label in enumerate(labels)}

    # Set up image transformations based on the model's requirements
    normalize = transforms.Normalize(
        mean=image_processor.image_mean,
        std=image_processor.image_std
    )

    # Get the expected image size
    if "shortest_edge" in image_processor.size:
        size = image_processor.size["shortest_edge"]
    else:
        size = (image_processor.size["height"], image_processor.size["width"])

    # Define transforms for training data
    train_transforms = transforms.Compose([
        transforms.RandomResizedCrop(size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    # Define transforms for validation/test data
    val_transforms = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.ToTensor(),
        normalize,
    ])

    # Apply transformations to the dataset
    def preprocess_train(examples):
        examples["pixel_values"] = [
            train_transforms(image.convert("RGB"))
            for image in examples["image"]
        ]
        return examples

    def preprocess_val(examples):
        examples["pixel_values"] = [
            val_transforms(image.convert("RGB"))
            for image in examples["image"]
        ]
        return examples

    # Apply preprocessing to each split
    train_dataset = dataset["train"].map(
        preprocess_train, batched=True, remove_columns=["image"]
    )

    if "validation" in dataset:
        val_dataset = dataset["validation"].map(
            preprocess_val, batched=True, remove_columns=["image"]
        )

    else:
        # Create a validation split if none exists
        splits = train_dataset.train_test_split(test_size=0.2, seed=42)
        train_dataset = splits["train"]
        val_dataset = splits["test"]

    if "test" in dataset:
        test_dataset = dataset["test"].map(
            preprocess_val, batched=True, remove_columns=["image"]
        )
    else:
        # test_dataset = val_dataset    #split further rather than using validation as test dataset

        # Further split validation dataset to create a test dataset
        test_split = val_dataset.train_test_split(test_size=0.2, seed=42)
        val_dataset = test_split["train"]  # Update validation dataset
        test_dataset = test_split["test"]  # Create test dataset

    print(f"Dataset prepared with {len(train_dataset)} training, {len(val_dataset)} validation, and {len(test_dataset)} test examples")

    return train_dataset, val_dataset, test_dataset, id2label, label2id

# Define compute_metrics function for evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    acc = accuracy_score(labels, predictions)

    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
    }

# Main experiment pipeline
def run_vit_experiment(config):
    """
    Run a ViT experiment with the specified configuration
    """
    # Initialize wandb for experiment tracking
    run = init_wandb("ViT-LR-Schedulers", config["experiment_name"], config)

    # Load the image processor for the model
    image_processor = AutoImageProcessor.from_pretrained(config["model_name"], use_fast=True)

    # Prepare the dataset
    train_dataset, val_dataset, test_dataset, id2label, label2id = prepare_dataset(
        config["dataset_name"], image_processor
    )

    # # Visualize some images from the training dataset (do this w/o the remove_columns=["image"])
    # # Initialize a set to keep track of shown labels
    # shown_labels = set()

    # # Initialize the figure for plotting
    # plt.figure(figsize=(15, 10))

    # # Loop through the dataset and plot the first image of each label
    # for i, sample in enumerate(train_dataset):
    #     label = sample["label"]
    #     image = sample["image"]

    #     # Check if the label has already been shown
    #     if label not in shown_labels:
    #         plt.subplot(1, len(id2label), len(shown_labels) + 1)
    #         plt.imshow(image.convert("RGB"))  # Convert to RGB if necessary
    #         plt.title(id2label[label])  # Get label name
    #         plt.axis("off")
    #         shown_labels.add(label)

    #         # Stop if all labels have been shown
    #         if len(shown_labels) == len(id2label):
    #             break

    # plt.show()

    # Load the ViT model
    model = ViTForImageClassification.from_pretrained(
        config["model_name"],
        num_labels=len(id2label),
        id2label=id2label,
        label2id=label2id,
        ignore_mismatched_sizes=True
    )

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f"./results/{config['experiment_name']}",
        per_device_train_batch_size=config["batch_size"],
        per_device_eval_batch_size=config["batch_size"],
        num_train_epochs=config["num_epochs"],
        weight_decay=config["weight_decay"],
        eval_strategy="steps",
        save_strategy="steps",
        logging_strategy="steps",  # Ensure logging is enabled
        logging_steps=10,          # Log every 10 steps (adjust as needed)
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        push_to_hub=False,
        report_to="wandb",
        remove_unused_columns=False,
        learning_rate=config["learning_rate"],
    )

    # Setup optimizer
    if config["optimizer_name"] == "AdamW":
        optimizer = AdamW(model.parameters(), lr=config["learning_rate"])
    else:  # SGD
        optimizer = SGD(model.parameters(), lr=config["learning_rate"], momentum=0.9)

    # Setup scheduler
    num_training_steps = len(train_dataset) // config["batch_size"] * config["num_epochs"]
    num_warmup_steps = int(num_training_steps * config["warmup_ratio"]) if "warmup_ratio" in config else 0

    scheduler_name = config["scheduler_name"]
    if scheduler_name == "linear":
        scheduler = get_scheduler(
            "linear",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )
    elif scheduler_name == "cosine":
        scheduler = get_scheduler(
            "cosine",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )
    elif scheduler_name == "cosine_with_restarts":
        scheduler = get_scheduler(
            "cosine_with_restarts",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps,
        )
    elif scheduler_name == "polynomial":
        scheduler = get_scheduler(
            "polynomial",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps,
            # power=config.get("poly_power", 1.0),
        )
    elif scheduler_name == "constant":
        scheduler = get_scheduler(
            "constant",
            optimizer=optimizer,
        )
    elif scheduler_name == "constant_with_warmup":
        scheduler = get_scheduler(
            "constant_with_warmup",
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
        )
    # add more experiments.


    else:
        raise ValueError(f"Scheduler {scheduler_name} not supported")

    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        optimizers=(optimizer, scheduler)
    )

    # Loss validation curve in the training loop to log metrics to W&B
    for epoch in range(config["num_epochs"]):
        print(f"Epoch {epoch + 1}/{config['num_epochs']}")

        # Train for one epoch
        trainer.train()

        # Evaluate on validation set
        eval_results = trainer.evaluate(val_dataset)

        print(trainer.state.log_history)
        if trainer.state.log_history and "loss" in trainer.state.log_history[-1]:
            train_loss = trainer.state.log_history[-1]["loss"]
        else:
            train_loss = None

        # Log training and validation metrics to W&B
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": trainer.state.log_history[-1].get("loss", None),
            "val_loss": eval_results["eval_loss"],
            "val_accuracy": eval_results["eval_accuracy"],
        })

    # Loss epoch curve in the training loop to log metrics to W&B
    for epoch in range(config["num_epochs"]):
        print(f"Epoch {epoch + 1}/{config['num_epochs']}")

        # Train for one epoch
        trainer.train()

        # Evaluate on validation set
        eval_results = trainer.evaluate(val_dataset)

        # Extract training loss from the trainer's state
        if trainer.state.log_history and "loss" in trainer.state.log_history[-1]:
            train_loss = trainer.state.log_history[-1]["loss"]
        else:
            train_loss = None  # Handle missing loss gracefully

        # Log training and validation metrics to W&B
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,                  # Training loss
            "val_loss": eval_results["eval_loss"],    # Validation loss
            "val_accuracy": eval_results["eval_accuracy"],  # Validation accuracy
        })

    # Train the model
    print(f"Starting training for {config['experiment_name']}...")
    trainer.train()

    # Evaluate the model
    print(f"Evaluating {config['experiment_name']}...")
    eval_results = trainer.evaluate(test_dataset)


    # Log final metrics
    wandb.log({
        "final_accuracy": eval_results["eval_accuracy"],
        "final_f1": eval_results["eval_f1"],
        "final_precision": eval_results["eval_precision"],
        "final_recall": eval_results["eval_recall"],
    })

    # Compute confusion matrix for test set
    predictions, labels, _ = trainer.predict(test_dataset)
    predictions = np.argmax(predictions, axis=1)

    # Convert to lists
    labels = labels.tolist()
    predictions = predictions.tolist()

    # Log confusion matrix to W&B
    wandb.log({
        "confusion_matrix_test": wandb.plot.confusion_matrix(
            probs=None,
            y_true=labels,
            preds=predictions,
            class_names=[str(i) for i in range(len(np.unique(labels)))]
        )
    })



    # Save the model
    trainer.save_model(f"./saved_models/{config['experiment_name']}")

    # Finish wandb run
    wandb.finish()

    return eval_results

# Get experiment configurations for challenging datasets
def get_experiment_configs():
    # We'll use a more complex dataset from Hugging Face
    base_config = {
        "model_name": "google/vit-base-patch16-224-in21k",
        "dataset_name": "jbarat/plant_species",  # Any challenging dataset.
        "batch_size": 16,
        "num_epochs": 3, # let's keep smaller number to begin with.
        "weight_decay": 0.01,
        "optimizer_name": "AdamW",
    }

    # Different learning rate scheduler configurations
    configs = []

    # Constant learning rate (baseline)
    configs.append({
        **base_config,
        "experiment_name": "vit_constant_lr",
        "learning_rate": 5e-5,
        "scheduler_name": "constant",
    })

    # Linear decay
    configs.append({
        **base_config,
        "experiment_name": "vit_linear_decay",
        "learning_rate": 5e-5,
        "scheduler_name": "linear",
        "warmup_ratio": 0.1,
    })

    # Cosine decay (commonly used with ViT)
    configs.append({
        **base_config,
        "experiment_name": "vit_cosine_decay",
        "learning_rate": 5e-5,
        "scheduler_name": "cosine",
        "warmup_ratio": 0.1,
    })

    # Cosine with restarts
    configs.append({
        **base_config,
        "experiment_name": "vit_cosine_restarts",
        "learning_rate": 5e-5,
        "scheduler_name": "cosine_with_restarts",
        "warmup_ratio": 0.1,
    })

    # Polynomial decay
    configs.append({
        **base_config,
        "experiment_name": "vit_polynomial",
        "learning_rate": 5e-5,
        "scheduler_name": "polynomial",
        "warmup_ratio": 0.1,
        "poly_power": 2.0,
    })

    # Constant with warmup
    configs.append({
        **base_config,
        "experiment_name": "vit_constant_warmup",
        "learning_rate": 5e-5,
        "scheduler_name": "constant_with_warmup",
        "warmup_ratio": 0.1,
    })

    # Different learning rate experiments
    for lr in [1e-5, 3e-5, 1e-4]:
        configs.append({
            **base_config,
            "experiment_name": f"vit_cosine_lr_{lr}",
            "learning_rate": lr,
            "scheduler_name": "cosine",
            "warmup_ratio": 0.1,
        })

    # Different optimizer experiments
    configs.append({
        **base_config,
        "experiment_name": "vit_sgd_cosine",
        "learning_rate": 0.01,  # Higher LR for SGD
        "scheduler_name": "cosine",
        "warmup_ratio": 0.1,
        "optimizer_name": "SGD",
    })

    # here we can make changes to add new datasets to experiment.
    # or change batch_size to see the impact.
    # Other datasets to try (uncomment to use)
    #   Erik: We can use a data set as a strech. Maybe something less similar than plants for better contrasting comparison?
    # flowers dataset
    # configs.append({
    #     **base_config,
    #     "dataset_name": "huggan/flowers",
    #     "experiment_name": "vit_flowers_cosine",
    #     "learning_rate": 5e-5,
    #     "scheduler_name": "cosine",
    #     "warmup_ratio": 0.1,
    # })

    return configs

# Run experiments and visualize results
def run_all_experiments():
    configs = get_experiment_configs()
    results = []

    for config in configs:
        print(f"\n{'='*50}")
        print(f"Running experiment: {config['experiment_name']}")
        print(f"{'='*50}\n")

        eval_results = run_vit_experiment(config)
        results.append({
            "experiment": config['experiment_name'],
            "accuracy": eval_results["eval_accuracy"],
            "f1": eval_results["eval_f1"],
            "precision": eval_results["eval_precision"],
            "recall": eval_results["eval_recall"],
            "config": config
        })

    return results

# Visualize and compare results
def visualize_results(results):
    # Create DataFrame for easier plotting
    df = pd.DataFrame([
        {
            "Experiment": result["experiment"],
            "Accuracy": result["accuracy"],
            "F1 Score": result["f1"],
            "Precision": result["precision"],
            "Recall": result["recall"],
            "Learning Rate": result["config"]["learning_rate"],
            "Scheduler": result["config"]["scheduler_name"],
            "Optimizer": result["config"]["optimizer_name"],
            "Dataset": result["config"]["dataset_name"]
        }
        for result in results
    ])

    # Plot accuracy comparison
    plt.figure(figsize=(14, 8))
    ax = plt.bar(df["Experiment"], df["Accuracy"], color='skyblue')
    plt.xlabel('Experiment')
    plt.ylabel('Accuracy')
    plt.title('Comparison of Model Accuracy Across Experiments')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig("accuracy_comparison.png")
    plt.close()

    # Plot all metrics for a more comprehensive comparison
    plt.figure(figsize=(16, 10))
    metrics = ["Accuracy", "F1 Score", "Precision", "Recall"]
    x = np.arange(len(df["Experiment"]))
    width = 0.2

    for i, metric in enumerate(metrics):
        plt.bar(x + i*width, df[metric], width=width, label=metric)

    plt.xlabel('Experiment')
    plt.ylabel('Score')
    plt.title('Comparison of Metrics Across Experiments')
    plt.xticks(x + width*1.5, df["Experiment"], rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.savefig("metrics_comparison.png")
    plt.close()

    # Plot results by scheduler type
    plt.figure(figsize=(14, 8))
    schedulers = df["Scheduler"].unique()
    for scheduler in schedulers:
        scheduler_data = df[df["Scheduler"] == scheduler]
        plt.plot(scheduler_data["Learning Rate"], scheduler_data["Accuracy"], 'o-', label=scheduler)

    plt.xlabel('Learning Rate')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs. Learning Rate by Scheduler Type')
    plt.xscale('log')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("scheduler_comparison.png")
    plt.close()

    # Create a table with results
    print("Results Summary:")
    print(df[["Experiment", "Accuracy", "F1 Score", "Precision", "Recall", "Scheduler", "Learning Rate", "Optimizer", "Dataset"]])

    # Save results to CSV
    df.to_csv("experiment_results.csv", index=False)

    return df

# Function to run a single experiment (useful for testing)
def run_single_experiment(experiment_index=0):
    configs = get_experiment_configs()
    if experiment_index >= len(configs):
        print(f"Invalid experiment index. Choose between 0 and {len(configs)-1}")
        return

    config = configs[experiment_index]
    print(f"Running single experiment: {config['experiment_name']}")
    eval_results = run_vit_experiment(config)

    print(f"\nResults for {config['experiment_name']}:")
    print(f"Accuracy: {eval_results['eval_accuracy']:.4f}")
    print(f"F1 Score: {eval_results['eval_f1']:.4f}")
    print(f"Precision: {eval_results['eval_precision']:.4f}")
    print(f"Recall: {eval_results['eval_recall']:.4f}")

    return eval_results

def run_optimizer_sweep():
    # Initialize W&B run
    run = wandb.init()
    print("W&B initialized:", run)

    config = wandb.config

    # Update the experiment configuration with sweep parameters
    experiment_config = {
        "model_name": "google/vit-base-patch16-224-in21k",
        "dataset_name": "jbarat/plant_species",
        "batch_size": config.batch_size,
        "num_epochs": config.num_epochs,
        "learning_rate": config.learning_rate,
        "weight_decay": 0.01,
        "optimizer_name": config.optimizer_name,
        "experiment_name": f"vit_{config.optimizer_name}_lr_{config.learning_rate}",
        "scheduler_name": config.scheduler_name,
    }

    # Run the experiment
    eval_results = run_vit_experiment(experiment_config)

    # Log final metrics to W&B
    wandb.log({
        "final_accuracy": eval_results["eval_accuracy"],
        "final_f1": eval_results["eval_f1"],
        "final_precision": eval_results["eval_precision"],
        "final_recall": eval_results["eval_recall"],
    })

    # Finish the W&B run
    # wandb.finish()

In [None]:

# Main execution
if __name__ == "__main__":
    print("Starting ViT experiments with different learning rate schedulers...")
    os.environ["WANDB_PROJECT"] = "ViT-LR-Schedulers"

    # Option 1: Run all experiments (time-consuming)
    # results = run_all_experiments()
    # results_df = visualize_results(results)

    # Option 2: Run a single experiment for testing
    # run_single_experiment(0)  # Try the baseline experiment first

    #option 3: Optimizer sweep:
    sweep_id = wandb.sweep(sweep_config, project="ViT-Optimizer-Sweep")
    wandb.agent(sweep_id, function=run_optimizer_sweep)

    print("Experiments completed!")

Starting ViT experiments with different learning rate schedulers...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: tm2oh2c4
Sweep URL: https://wandb.ai/dl_project_sp25/ViT-Optimizer-Sweep/sweeps/tm2oh2c4


[34m[1mwandb[0m: Agent Starting Run: a3aipvvg with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	learning_rate: 2e-05
[34m[1mwandb[0m: 	num_epochs: 3
[34m[1mwandb[0m: 	optimizer_name: AdamW
[34m[1mwandb[0m: 	scheduler_name: cosine
[34m[1mwandb[0m: Currently logged in as: [33mewg[0m ([33mdl_project_sp25[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


W&B initialized: <wandb.sdk.wandb_run.Run object at 0x106a1c990>




Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.0476,2.001338,0.382812,0.350318,0.346596,0.382812
20,1.9855,1.930775,0.59375,0.530631,0.498367,0.59375
30,1.9388,1.867357,0.640625,0.587718,0.566239,0.640625
40,1.8646,1.810676,0.65625,0.609868,0.586438,0.65625
50,1.7252,1.759613,0.695312,0.677438,0.693231,0.695312
60,1.6993,1.714998,0.710938,0.699407,0.717201,0.710938
70,1.6492,1.681201,0.71875,0.706872,0.725735,0.71875
80,1.6428,1.657151,0.703125,0.690678,0.709404,0.703125
90,1.5584,1.642181,0.710938,0.697061,0.728322,0.710938
100,1.5663,1.634265,0.710938,0.698229,0.71692,0.710938


[{'loss': 2.0476, 'grad_norm': 1.7465901374816895, 'learning_rate': 1.9723699203976768e-05, 'epoch': 0.25, 'step': 10}, {'eval_loss': 2.001338481903076, 'eval_accuracy': 0.3828125, 'eval_f1': 0.35031790525325623, 'eval_precision': 0.3465958656379852, 'eval_recall': 0.3828125, 'eval_runtime': 7.3739, 'eval_samples_per_second': 17.359, 'eval_steps_per_second': 1.085, 'epoch': 0.25, 'step': 10}, {'loss': 1.9855, 'grad_norm': 1.8043400049209595, 'learning_rate': 1.8788171126619653e-05, 'epoch': 0.5, 'step': 20}, {'eval_loss': 1.9307751655578613, 'eval_accuracy': 0.59375, 'eval_f1': 0.5306306573601949, 'eval_precision': 0.4983674518327068, 'eval_recall': 0.59375, 'eval_runtime': 7.7362, 'eval_samples_per_second': 16.546, 'eval_steps_per_second': 1.034, 'epoch': 0.5, 'step': 20}, {'loss': 1.9388, 'grad_norm': 1.820662498474121, 'learning_rate': 1.725374371012288e-05, 'epoch': 0.75, 'step': 30}, {'eval_loss': 1.8673568964004517, 'eval_accuracy': 0.640625, 'eval_f1': 0.5877184098691453, 'eval_

Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,1.5078,1.630374,0.710938,0.698229,0.71692,0.710938
20,1.5307,1.627552,0.710938,0.698229,0.71692,0.710938
30,1.5629,1.620325,0.710938,0.698507,0.71748,0.710938
40,1.5407,1.607143,0.710938,0.698507,0.71748,0.710938
50,1.469,1.584902,0.71875,0.705401,0.726865,0.71875
60,1.4741,1.552525,0.734375,0.721099,0.740344,0.734375
70,1.4323,1.513136,0.742188,0.733439,0.753108,0.742188
80,1.4387,1.465119,0.757812,0.749984,0.768109,0.757812
90,1.2945,1.411643,0.765625,0.762043,0.773445,0.765625
100,1.2605,1.355242,0.796875,0.793735,0.801687,0.796875


[{'loss': 1.5078, 'grad_norm': 1.7949110269546509, 'learning_rate': 2.7630079602323335e-07, 'epoch': 0.25, 'step': 10}, {'eval_loss': 1.6303739547729492, 'eval_accuracy': 0.7109375, 'eval_f1': 0.6982291695523868, 'eval_precision': 0.7169196975665266, 'eval_recall': 0.7109375, 'eval_runtime': 6.9634, 'eval_samples_per_second': 18.382, 'eval_steps_per_second': 1.149, 'epoch': 0.25, 'step': 10}, {'loss': 1.5307, 'grad_norm': 1.8965873718261719, 'learning_rate': 1.2118288733803462e-06, 'epoch': 0.5, 'step': 20}, {'eval_loss': 1.6275523900985718, 'eval_accuracy': 0.7109375, 'eval_f1': 0.6982291695523868, 'eval_precision': 0.7169196975665266, 'eval_recall': 0.7109375, 'eval_runtime': 7.4969, 'eval_samples_per_second': 17.074, 'eval_steps_per_second': 1.067, 'epoch': 0.5, 'step': 20}, {'loss': 1.5629, 'grad_norm': 1.8974571228027344, 'learning_rate': 2.746256289877125e-06, 'epoch': 0.75, 'step': 30}, {'eval_loss': 1.6203253269195557, 'eval_accuracy': 0.7109375, 'eval_f1': 0.6985071142639252, 

Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,0.9934,1.180543,0.828125,0.82645,0.830715,0.828125
20,0.9493,1.132684,0.828125,0.826029,0.828742,0.828125
30,0.9608,1.086893,0.820312,0.817134,0.819652,0.820312
40,0.8818,1.051632,0.835938,0.834565,0.837333,0.835938
50,0.7881,1.02403,0.835938,0.835251,0.841686,0.835938
60,0.7643,1.001675,0.828125,0.825265,0.828876,0.828125
70,0.7191,0.98694,0.828125,0.826694,0.830595,0.828125
80,0.7343,0.971487,0.828125,0.826495,0.83042,0.828125
90,0.6886,0.960844,0.828125,0.826694,0.830595,0.828125
100,0.674,0.957579,0.820312,0.818713,0.823575,0.820312


[{'loss': 0.9934, 'grad_norm': 1.6726435422897339, 'learning_rate': 1.9723699203976768e-05, 'epoch': 0.25, 'step': 10}, {'eval_loss': 1.1805428266525269, 'eval_accuracy': 0.828125, 'eval_f1': 0.8264503844467079, 'eval_precision': 0.8307146697874495, 'eval_recall': 0.828125, 'eval_runtime': 7.0293, 'eval_samples_per_second': 18.209, 'eval_steps_per_second': 1.138, 'epoch': 0.25, 'step': 10}, {'loss': 0.9493, 'grad_norm': 1.8174797296524048, 'learning_rate': 1.878817112661966e-05, 'epoch': 0.5, 'step': 20}, {'eval_loss': 1.1326837539672852, 'eval_accuracy': 0.828125, 'eval_f1': 0.8260285638550091, 'eval_precision': 0.8287417010073259, 'eval_recall': 0.828125, 'eval_runtime': 6.7346, 'eval_samples_per_second': 19.006, 'eval_steps_per_second': 1.188, 'epoch': 0.5, 'step': 20}, {'loss': 0.9608, 'grad_norm': 1.8577589988708496, 'learning_rate': 1.725374371012288e-05, 'epoch': 0.75, 'step': 30}, {'eval_loss': 1.08689284324646, 'eval_accuracy': 0.8203125, 'eval_f1': 0.8171338127727581, 'eval_p

Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,0.5832,0.911223,0.820312,0.818998,0.824111,0.820312
20,0.5728,0.910086,0.820312,0.818998,0.824111,0.820312
30,0.6201,0.90926,0.828125,0.826446,0.833516,0.828125
40,0.6242,0.904044,0.835938,0.834228,0.840361,0.835938
50,0.5782,0.892165,0.851562,0.851183,0.862903,0.851562
60,0.5666,0.877373,0.84375,0.842794,0.850532,0.84375
70,0.5362,0.867887,0.835938,0.83446,0.840488,0.835938
80,0.5369,0.836221,0.84375,0.841893,0.847906,0.84375
90,0.4962,0.822638,0.851562,0.851183,0.862903,0.851562
100,0.4587,0.799615,0.828125,0.826983,0.83204,0.828125


Epoch 2/3


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,0.3521,0.721406,0.835938,0.835042,0.841122,0.835938
20,0.3241,0.713091,0.835938,0.834889,0.840861,0.835938
30,0.3233,0.694079,0.835938,0.83446,0.840488,0.835938
40,0.3037,0.668224,0.835938,0.835042,0.841122,0.835938
50,0.2683,0.667552,0.84375,0.843772,0.852064,0.84375
60,0.2586,0.656514,0.835938,0.835042,0.841122,0.835938
70,0.2499,0.650288,0.835938,0.835042,0.841122,0.835938
80,0.2592,0.639067,0.835938,0.835042,0.841122,0.835938
90,0.2418,0.63768,0.835938,0.835042,0.841122,0.835938
100,0.24,0.641208,0.835938,0.835042,0.841122,0.835938


Epoch 3/3


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,0.2316,0.641601,0.835938,0.835042,0.841122,0.835938
20,0.2316,0.640871,0.835938,0.835042,0.841122,0.835938
30,0.2372,0.640662,0.835938,0.835042,0.841122,0.835938
40,0.2327,0.638954,0.835938,0.835042,0.841122,0.835938
50,0.225,0.637151,0.835938,0.835042,0.841122,0.835938
60,0.2214,0.635446,0.828125,0.827499,0.832276,0.828125
70,0.2165,0.629218,0.835938,0.835042,0.841122,0.835938
80,0.2218,0.611397,0.835938,0.835042,0.841122,0.835938
90,0.2044,0.619512,0.851562,0.851315,0.86091,0.851562
100,0.1977,0.626304,0.828125,0.82748,0.832592,0.828125


Starting training for vit_AdamW_lr_2e-05...


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,0.1708,0.603775,0.835938,0.835438,0.842019,0.835938
20,0.1654,0.588906,0.828125,0.827894,0.832222,0.828125
30,0.1629,0.599685,0.820312,0.821361,0.826671,0.820312
40,0.1569,0.58306,0.820312,0.820412,0.824643,0.820312
50,0.1509,0.585128,0.820312,0.820412,0.824643,0.820312
60,0.1473,0.587598,0.8125,0.813359,0.818169,0.8125
70,0.1445,0.58452,0.820312,0.820412,0.824643,0.820312
80,0.1469,0.578535,0.828125,0.827894,0.832222,0.828125
90,0.1425,0.5761,0.820312,0.820412,0.824643,0.820312
100,0.1427,0.579333,0.820312,0.820412,0.824643,0.820312


Evaluating vit_AdamW_lr_2e-05...


0,1
epoch,▁▅█▁▅█
eval/accuracy,▁▅▅▆▆▆▆▆▆▆▇▇▇██████████████████████████▇
eval/f1,▁▄▆▆▆▆▆▆▆▆▇▇▇▇██████████████████████████
eval/loss,█▇▇▇▆▆▆▆▆▆▆▅▅▅▄▃▃▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/precision,▁▄▆▆▆▆▆▆▆▆▇█▇██▇▇██████████████████▇▇█▇█
eval/recall,▁▂▂▂▂▂▂▂▂▂▃▆▆▇▇▇▇▆▆▇▇▇███▇▇█▇▇▇▇▇▇█▇▇▇▆▇
eval/runtime,▅▇▅▅▅▅▅▅▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█▅▅▅▅▅▅▅▅▅▅▅▆▅▁
eval/samples_per_second,▆▅▁▇█▇▆▇█▆▇██▇▇██▇█▆█▆█▇▇█▆█▇▇███▆██▇▆▇▇
eval/steps_per_second,▆▆█▆▇▇█▇▅██▇█▇▇██▆▇█▇▇█▇█▃▁▇▇█▇▇███▇▇▇▅▆
final_accuracy,▁

0,1
epoch,3.0
eval/accuracy,0.8125
eval/f1,0.80871
eval/loss,0.6893
eval/precision,0.84449
eval/recall,0.8125
eval/runtime,1.8313
eval/samples_per_second,17.474
eval/steps_per_second,1.092
final_accuracy,0.8125


[34m[1mwandb[0m: [32m[41mERROR[0m Run a3aipvvg errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/Users/egoh02/Github/CS7643-Project-SP25/.venv/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/var/folders/3x/ww_4869j0gq7501tpsbdxh0m0000gq/T/ipykernel_3005/3738181047.py", line 606, in run_optimizer_sweep
[34m[1mwandb[0m: [32m[41mERROR[0m     wandb.log({
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/Users/egoh02/Github/CS7643-Project-SP25/.venv/lib/python3.11/site-packages/wandb/sdk/lib/preinit.py", line 36, in preinit_wrapper
[34m[1mwandb[0m: [32m[41mERROR[0m     raise wandb.Error(f"You must call wandb.init() before {name}()")
[34m[1mwandb[0m: [32m[41mERROR[0m wandb.errors.errors.Error: You must call wandb.init() before wandb.log()
[34m[1mwan

W&B initialized: <wandb.sdk.wandb_run.Run object at 0x1695a0550>


Loading dataset: jbarat/plant_species
Dataset prepared with 640 training, 128 validation, and 32 test examples


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
10,2.1135,2.109639,0.070312,0.069318,0.070427,0.070312
