In [1]:
import torch
print(torch.cuda.is_available())   # Should return True
print(torch.cuda.get_device_name(0))  # Should show RTX 4060

True
NVIDIA GeForce RTX 4060 Laptop GPU


In [2]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


# Stage 2 - Paths
stage2_train_dir = "../Dataset/stage2/train"
stage2_val_dir = "../Dataset/stage2/val"
stage2_test_dir = "../Dataset/stage2/test"

# Transforms (same as stage 1)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Datasets and loaders
train_dataset2 = datasets.ImageFolder(stage2_train_dir, transform=transform)
val_dataset2 = datasets.ImageFolder(stage2_val_dir, transform=transform)
test_dataset2 = datasets.ImageFolder(stage2_test_dir, transform=transform)

train_loader2 = DataLoader(train_dataset2, batch_size=32, shuffle=True)
val_loader2 = DataLoader(val_dataset2, batch_size=32)
test_loader2 = DataLoader(test_dataset2, batch_size=32)

# Class names
class_names2 = train_dataset2.classes
print("Stage 2 Classes:", class_names2)



Stage 2 Classes: ['Bacterial Pneumonia', 'Viral Pneumonia']


In [3]:
from transformers import ViTForImageClassification
# Define model
model2 = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=2  # Binary classification
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model2.to(device)


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda


ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermed

In [4]:
from tqdm import tqdm
# Optimizer and loss
optimizer2 = torch.optim.Adam(model2.parameters(), lr=2e-5)
criterion2 = torch.nn.CrossEntropyLoss()

# Early stopping params
best_val_loss2 = float('inf')
patience = 3
trigger_times = 0

# Training loop
for epoch in range(1, 16):
    model2.train()
    train_loss = 0.0

    loop = tqdm(train_loader2, desc=f"Epoch {epoch}/15")
    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)

        optimizer2.zero_grad()
        outputs = model2(images).logits
        loss = criterion2(outputs, labels)
        loss.backward()
        optimizer2.step()

        train_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    avg_train_loss = train_loss / len(train_loader2)

    # Validation
    model2.eval()
    val_loss = 0.0
    correct, total = 0, 0

    with torch.no_grad():
        for images, labels in val_loader2:
            images, labels = images.to(device), labels.to(device)
            outputs = model2(images).logits
            loss = criterion2(outputs, labels)
            val_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avg_val_loss = val_loss / len(val_loader2)
    val_accuracy = 100 * correct / total

    print(f"\nEpoch {epoch} finished. Avg Train Loss: {avg_train_loss:.4f}")
    print(f"Validation Loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy:.2f}%")

    # Check for improvement
    if avg_val_loss < best_val_loss2:
        best_val_loss2 = avg_val_loss
        torch.save(model2.state_dict(), "../Model/best_model_stage2.pth")
        print(f" Best model saved at epoch {epoch}.")
        trigger_times = 0
    else:
        trigger_times += 1
        if trigger_times >= patience:
            print(f" Early stopping triggered after {epoch} epochs.")
            break


Epoch 1/15: 100%|██████████| 76/76 [01:51<00:00,  1.46s/it, loss=0.381]



Epoch 1 finished. Avg Train Loss: 0.5821
Validation Loss: 0.5492, Accuracy: 71.70%
 Best model saved at epoch 1.


Epoch 2/15: 100%|██████████| 76/76 [01:24<00:00,  1.11s/it, loss=0.497]



Epoch 2 finished. Avg Train Loss: 0.4956
Validation Loss: 0.4839, Accuracy: 77.68%
 Best model saved at epoch 2.


Epoch 3/15: 100%|██████████| 76/76 [01:45<00:00,  1.39s/it, loss=0.496]



Epoch 3 finished. Avg Train Loss: 0.3874
Validation Loss: 0.4528, Accuracy: 81.67%
 Best model saved at epoch 3.


Epoch 4/15: 100%|██████████| 76/76 [01:47<00:00,  1.41s/it, loss=0.389] 



Epoch 4 finished. Avg Train Loss: 0.2918
Validation Loss: 0.4831, Accuracy: 80.05%


Epoch 5/15: 100%|██████████| 76/76 [01:42<00:00,  1.34s/it, loss=0.179] 



Epoch 5 finished. Avg Train Loss: 0.2403
Validation Loss: 0.4751, Accuracy: 79.18%


Epoch 6/15: 100%|██████████| 76/76 [01:40<00:00,  1.33s/it, loss=0.717] 



Epoch 6 finished. Avg Train Loss: 0.1283
Validation Loss: 0.4951, Accuracy: 81.30%
 Early stopping triggered after 6 epochs.


In [28]:
import gc
import torch

# Remove all non-private global variables
for obj in list(globals().keys()):
    if not obj.startswith("_") and obj not in ["gc", "torch"]:
        del globals()[obj]

# Run garbage collector
gc.collect()

# Clear GPU memory
torch.cuda.empty_cache()
torch.cuda.ipc_collect()


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [6]:
# Load best model
model2.load_state_dict(torch.load("../Model/best_model_stage2.pth",weights_only=True))
model2.eval()

correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader2:
        images, labels = images.to(device), labels.to(device)
        outputs = model2(images).logits
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

test_accuracy = 100 * correct / total
print(f" Final Test Accuracy (Stage 2): {test_accuracy:.2f}%")


 Final Test Accuracy (Stage 2): 81.14%


In [22]:

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Set model to evaluation mode
model2.eval()

true_labels = []
pred_labels = []

# Disable gradient calculation for inference
with torch.no_grad():
    for images, labels in val_loader2:
        images, labels = images.to(device), labels.to(device)
        outputs = model2(images)
        _, preds = torch.max(outputs.logits, 1)

        true_labels.extend(labels.cpu().numpy())
        pred_labels.extend(preds.cpu().numpy())



# Generate and plot confusion matrix
cm = confusion_matrix(true_labels, pred_labels)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names2)

fig, ax = plt.subplots(figsize=(8, 6))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=45)
plt.title("Confusion Matrix - Stage 2")
plt.grid(False)
plt.show()


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


HyperParameter Tuning

In [32]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from torch.utils.data import DataLoader
from transformers import ViTForImageClassification
from tqdm import tqdm
import os
import copy
from torch.utils.tensorboard import SummaryWriter
from torch.amp import GradScaler, autocast
import csv

def objective(trial):
    # Hyperparameters
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True)
    batch_size = trial.suggest_categorical('batch_size', [16,32,64])
    num_epochs = trial.suggest_int('num_epochs', 5, 15)
    hidden_size = trial.suggest_int('hidden_size', 128, 512, step=64)
    patience = 3

    print(f"\n Trial {trial.number} | LR: {learning_rate:.1e} | BS: {batch_size} | Epochs: {num_epochs} | Hidden: {hidden_size}")

    # Model setup
    model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=2)
    model.classifier = nn.Sequential(
        nn.Linear(model.classifier.in_features, hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size, 2)
    )
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, verbose=True)
    scaler = GradScaler()

    train_loader = DataLoader(train_dataset2, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset2, batch_size=batch_size)

    writer = SummaryWriter(log_dir=f"../logs/stage2_trial{trial.number}")
    csv_path = f"../logs/stage2_trial{trial.number}.csv"
    with open(csv_path, mode='w', newline='') as f:
        writer_csv = csv.writer(f)
        writer_csv.writerow(["epoch", "accuracy", "f1", "precision", "recall"])

    best_accuracy = 0
    best_model_state = None
    trigger_times = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        model.train()
        running_loss = 0

        for images, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            with autocast(device_type='cuda'):
                outputs = model(images)
                loss = criterion(outputs.logits, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            running_loss += loss.item()

        # Validation
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                with autocast(device_type='cuda'):
                    outputs = model(images)
                _, preds = torch.max(outputs.logits, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        accuracy = accuracy_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds)
        precision = precision_score(all_labels, all_preds)
        recall = recall_score(all_labels, all_preds)

        writer.add_scalar("Loss/train", running_loss / len(train_loader), epoch)
        writer.add_scalar("Accuracy/val", accuracy, epoch)
        writer.add_scalar("F1/val", f1, epoch)
        writer.add_scalar("Precision/val", precision, epoch)
        writer.add_scalar("Recall/val", recall, epoch)

        with open(csv_path, mode='a', newline='') as f:
            writer_csv = csv.writer(f)
            writer_csv.writerow([epoch, accuracy, f1, precision, recall])

        print(f"Acc: {accuracy:.4f} | F1: {f1:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

        scheduler.step(accuracy)
        trial.report(accuracy, epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model_state = copy.deepcopy(model.state_dict())
            trigger_times = 0
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    # Save best model and hyperparameters
    os.makedirs("../checkpoints", exist_ok=True)
    model_path = f"../checkpoints/stage2_best_model_trial{trial.number}.pth"
    torch.save({
        'model_state_dict': best_model_state,
        'hyperparameters': {
            'learning_rate': learning_rate,
            'batch_size': batch_size,
            'num_epochs': num_epochs,
            'hidden_size': hidden_size,
        }
    }, model_path)
    print(f"Best model saved to: {model_path}")

    writer.close()
    return best_accuracy


In [18]:
print("Starting tuning...")
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=5)
print(f"Best trial: {study.best_trial.number} | Accuracy: {study.best_value:.4f}")
print(study.best_params)

[I 2025-04-18 11:18:23,665] A new study created in memory with name: no-name-931d4e37-8f3e-41bc-aa14-f3cc76568c12


Starting tuning...

 Trial 0 | LR: 2.5e-05 | BS: 64 | Epochs: 15 | Hidden: 320


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



📘 Epoch 1/15


Training Epoch 1: 100%|██████████| 38/38 [10:30<00:00, 16.60s/it]


 Acc: 0.7556 | F1: 0.7401 | Precision: 0.7904 | Recall: 0.6958

📘 Epoch 2/15


Training Epoch 2: 100%|██████████| 38/38 [11:07<00:00, 17.56s/it]


 Acc: 0.7506 | F1: 0.7763 | Precision: 0.7039 | Recall: 0.8653

📘 Epoch 3/15


Training Epoch 3: 100%|██████████| 38/38 [10:52<00:00, 17.17s/it]


 Acc: 0.7781 | F1: 0.7973 | Precision: 0.7338 | Recall: 0.8728

📘 Epoch 4/15


Training Epoch 4: 100%|██████████| 38/38 [09:00<00:00, 14.23s/it]


 Acc: 0.8117 | F1: 0.8192 | Precision: 0.7880 | Recall: 0.8529

📘 Epoch 5/15


Training Epoch 5: 100%|██████████| 38/38 [09:08<00:00, 14.42s/it]


 Acc: 0.8267 | F1: 0.8335 | Precision: 0.8018 | Recall: 0.8678

📘 Epoch 6/15


Training Epoch 6: 100%|██████████| 38/38 [10:05<00:00, 15.93s/it]


 Acc: 0.7930 | F1: 0.8052 | Precision: 0.7605 | Recall: 0.8554

📘 Epoch 7/15


Training Epoch 7: 100%|██████████| 38/38 [10:48<00:00, 17.07s/it]


 Acc: 0.8142 | F1: 0.8198 | Precision: 0.7958 | Recall: 0.8454

📘 Epoch 8/15


Training Epoch 8: 100%|██████████| 38/38 [10:06<00:00, 15.95s/it]


 Acc: 0.7930 | F1: 0.7707 | Precision: 0.8638 | Recall: 0.6958
⏹️ Early stopping at epoch 8


[I 2025-04-18 12:42:09,931] Trial 0 finished with value: 0.8266832917705735 and parameters: {'learning_rate': 2.5441701240034406e-05, 'batch_size': 64, 'num_epochs': 15, 'hidden_size': 320}. Best is trial 0 with value: 0.8266832917705735.


💾 Best model saved to: ../checkpoints/stage2_best_model_trial0.pth

 Trial 1 | LR: 4.1e-05 | BS: 32 | Epochs: 6 | Hidden: 512


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



📘 Epoch 1/6


Training Epoch 1: 100%|██████████| 76/76 [07:41<00:00,  6.08s/it]


 Acc: 0.7618 | F1: 0.7516 | Precision: 0.7853 | Recall: 0.7207

📘 Epoch 2/6


Training Epoch 2: 100%|██████████| 76/76 [09:06<00:00,  7.19s/it]


 Acc: 0.7756 | F1: 0.7810 | Precision: 0.7625 | Recall: 0.8005

📘 Epoch 3/6


Training Epoch 3: 100%|██████████| 76/76 [07:01<00:00,  5.54s/it]


 Acc: 0.8005 | F1: 0.8122 | Precision: 0.7672 | Recall: 0.8628

📘 Epoch 4/6


Training Epoch 4: 100%|██████████| 76/76 [08:32<00:00,  6.75s/it]


 Acc: 0.7606 | F1: 0.7318 | Precision: 0.8317 | Recall: 0.6534

📘 Epoch 5/6


Training Epoch 5: 100%|██████████| 76/76 [08:32<00:00,  6.74s/it]


 Acc: 0.8005 | F1: 0.7878 | Precision: 0.8414 | Recall: 0.7406

📘 Epoch 6/6


Training Epoch 6: 100%|██████████| 76/76 [07:19<00:00,  5.79s/it]


 Acc: 0.7805 | F1: 0.7939 | Precision: 0.7483 | Recall: 0.8454
⏹️ Early stopping at epoch 6


[I 2025-04-18 13:33:36,027] Trial 1 finished with value: 0.800498753117207 and parameters: {'learning_rate': 4.1171696049897376e-05, 'batch_size': 32, 'num_epochs': 6, 'hidden_size': 512}. Best is trial 0 with value: 0.8266832917705735.


💾 Best model saved to: ../checkpoints/stage2_best_model_trial1.pth

 Trial 2 | LR: 1.9e-05 | BS: 64 | Epochs: 10 | Hidden: 512


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



📘 Epoch 1/10


Training Epoch 1: 100%|██████████| 38/38 [07:10<00:00, 11.33s/it]


 Acc: 0.7070 | F1: 0.7296 | Precision: 0.6774 | Recall: 0.7905

📘 Epoch 2/10


Training Epoch 2: 100%|██████████| 38/38 [09:59<00:00, 15.77s/it]


 Acc: 0.7456 | F1: 0.7086 | Precision: 0.8294 | Recall: 0.6185

📘 Epoch 3/10


Training Epoch 3: 100%|██████████| 38/38 [15:12<00:00, 24.03s/it]


 Acc: 0.7556 | F1: 0.7586 | Precision: 0.7494 | Recall: 0.7681

📘 Epoch 4/10


Training Epoch 4: 100%|██████████| 38/38 [13:03<00:00, 20.61s/it]


 Acc: 0.7693 | F1: 0.7895 | Precision: 0.7259 | Recall: 0.8653

📘 Epoch 5/10


Training Epoch 5: 100%|██████████| 38/38 [11:35<00:00, 18.31s/it]


 Acc: 0.7930 | F1: 0.8042 | Precision: 0.7629 | Recall: 0.8504

📘 Epoch 6/10


Training Epoch 6: 100%|██████████| 38/38 [13:05<00:00, 20.67s/it]


 Acc: 0.7656 | F1: 0.7227 | Precision: 0.8845 | Recall: 0.6110

📘 Epoch 7/10


Training Epoch 7: 100%|██████████| 38/38 [13:46<00:00, 21.75s/it]


 Acc: 0.8092 | F1: 0.8176 | Precision: 0.7831 | Recall: 0.8554

📘 Epoch 8/10


Training Epoch 8: 100%|██████████| 38/38 [09:11<00:00, 14.51s/it]


 Acc: 0.8155 | F1: 0.8173 | Precision: 0.8093 | Recall: 0.8254

📘 Epoch 9/10


Training Epoch 9: 100%|██████████| 38/38 [08:44<00:00, 13.80s/it]


 Acc: 0.8130 | F1: 0.8193 | Precision: 0.7925 | Recall: 0.8479

📘 Epoch 10/10


Training Epoch 10: 100%|██████████| 38/38 [10:57<00:00, 17.31s/it]


 Acc: 0.8005 | F1: 0.8086 | Precision: 0.7770 | Recall: 0.8429


[I 2025-04-18 15:29:16,655] Trial 2 finished with value: 0.8154613466334164 and parameters: {'learning_rate': 1.883397917600412e-05, 'batch_size': 64, 'num_epochs': 10, 'hidden_size': 512}. Best is trial 0 with value: 0.8266832917705735.


💾 Best model saved to: ../checkpoints/stage2_best_model_trial2.pth

 Trial 3 | LR: 8.5e-05 | BS: 32 | Epochs: 9 | Hidden: 448


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



📘 Epoch 1/9


Training Epoch 1: 100%|██████████| 76/76 [11:10<00:00,  8.83s/it]


 Acc: 0.7045 | F1: 0.6146 | Precision: 0.8832 | Recall: 0.4713

📘 Epoch 2/9


Training Epoch 2: 100%|██████████| 76/76 [14:03<00:00, 11.10s/it]


 Acc: 0.7506 | F1: 0.7333 | Precision: 0.7880 | Recall: 0.6858

📘 Epoch 3/9


Training Epoch 3: 100%|██████████| 76/76 [09:02<00:00,  7.14s/it]


 Acc: 0.7706 | F1: 0.7579 | Precision: 0.8022 | Recall: 0.7182

📘 Epoch 4/9


Training Epoch 4: 100%|██████████| 76/76 [13:45<00:00, 10.86s/it]


 Acc: 0.7519 | F1: 0.7872 | Precision: 0.6891 | Recall: 0.9177

📘 Epoch 5/9


Training Epoch 5: 100%|██████████| 76/76 [09:03<00:00,  7.15s/it]


 Acc: 0.7756 | F1: 0.7716 | Precision: 0.7855 | Recall: 0.7581

📘 Epoch 6/9


Training Epoch 6: 100%|██████████| 76/76 [07:32<00:00,  5.95s/it]


 Acc: 0.7768 | F1: 0.7591 | Precision: 0.8246 | Recall: 0.7032

📘 Epoch 7/9


Training Epoch 7: 100%|██████████| 76/76 [08:22<00:00,  6.61s/it]


 Acc: 0.7793 | F1: 0.7662 | Precision: 0.8146 | Recall: 0.7232

📘 Epoch 8/9


Training Epoch 8: 100%|██████████| 76/76 [07:41<00:00,  6.07s/it]


 Acc: 0.7506 | F1: 0.7863 | Precision: 0.6879 | Recall: 0.9177

📘 Epoch 9/9


Training Epoch 9: 100%|██████████| 76/76 [08:00<00:00,  6.32s/it]


 Acc: 0.7968 | F1: 0.7985 | Precision: 0.7917 | Recall: 0.8055


[I 2025-04-18 17:05:06,354] Trial 3 finished with value: 0.7967581047381546 and parameters: {'learning_rate': 8.494287124746428e-05, 'batch_size': 32, 'num_epochs': 9, 'hidden_size': 448}. Best is trial 0 with value: 0.8266832917705735.


💾 Best model saved to: ../checkpoints/stage2_best_model_trial3.pth

 Trial 4 | LR: 1.9e-04 | BS: 128 | Epochs: 5 | Hidden: 192


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



📘 Epoch 1/5


Training Epoch 1:   0%|          | 0/19 [00:11<?, ?it/s]
[W 2025-04-18 17:05:19,205] Trial 4 failed with parameters: {'learning_rate': 0.00018916225997461672, 'batch_size': 128, 'num_epochs': 5, 'hidden_size': 192} because of the following error: RuntimeError('CUDA error: out of memory\nCUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1\nCompile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\n').
Traceback (most recent call last):
  File "E:\Lungdisease\.venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Prasanna Arjun\AppData\Local\Temp\ipykernel_11952\597607420.py", line 62, in objective
    outputs = model(images)
              ^^^^^^^^^^^^^
  File "E:\Lungdisease\.venv\Lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_ca

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [20]:
print(f"Best trial: {study.best_trial} | Accuracy: {study.best_value:.4f}")

Best trial: FrozenTrial(number=0, state=1, values=[0.8266832917705735], datetime_start=datetime.datetime(2025, 4, 18, 11, 18, 23, 666259), datetime_complete=datetime.datetime(2025, 4, 18, 12, 42, 9, 923254), params={'learning_rate': 2.5441701240034406e-05, 'batch_size': 64, 'num_epochs': 15, 'hidden_size': 320}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.7556109725685786, 1: 0.7506234413965087, 2: 0.7780548628428927, 3: 0.8117206982543641, 4: 0.8266832917705735, 5: 0.7930174563591023, 6: 0.814214463840399, 7: 0.7930174563591023}, distributions={'learning_rate': FloatDistribution(high=0.001, log=True, low=1e-05, step=None), 'batch_size': CategoricalDistribution(choices=(32, 64, 128)), 'num_epochs': IntDistribution(high=15, log=False, low=5, step=1), 'hidden_size': IntDistribution(high=512, log=False, low=128, step=64)}, trial_id=0, value=None) | Accuracy: 0.8267


In [31]:
checkpoint = torch.load('/checkpoints/stage2_best_model_trial0.pth', map_location='cpu')
print(checkpoint['hyperparameters'])  # If saved


NameError: name 'study' is not defined