In [1]:
import os
import json
import time
import torch
import torch.nn as nn
import torchvision
import numpy as np
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm

# -----------------------------
# Settings
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Using device: {device}")

DATA_DIR = '/content/drive/MyDrive/Colab Notebooks/tb_data'
MODEL_SAVE_PATH = "vgg16_epoch5.pth"
METRICS_FILE = "vgg16_metrics.json"

BATCH_SIZE = 32
IMG_SIZE = 224
EPOCHS = 5

# -----------------------------
# Transforms
# -----------------------------
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# -----------------------------
# Load Dataset
# -----------------------------
dataset = datasets.ImageFolder(DATA_DIR, transform=transform)
class_names = dataset.classes

# Train-val-test split
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_ds, val_ds, test_ds = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)

# -----------------------------
# Model Setup: VGG16
# -----------------------------
vgg = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
for param in vgg.features.parameters():
    param.requires_grad = False
vgg.classifier[6] = nn.Linear(4096, len(class_names))

model = vgg.to(device)

# -----------------------------
# Loss and Optimizer
# -----------------------------
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

# -----------------------------
# Training Loop
# -----------------------------
train_losses, val_losses, train_accs, val_accs, epoch_times = [], [], [], [], []

for epoch in range(EPOCHS):
    start_time = time.time()
    model.train()
    total_loss, correct, total = 0.0, 0, 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} - Training"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_loss = total_loss / len(train_loader)
    train_accuracy = 100 * correct / total
    train_losses.append(train_loss)
    train_accs.append(train_accuracy)

    # -----------------------------
    # Validation
    # -----------------------------
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)

    val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * val_correct / val_total
    val_losses.append(val_loss)
    val_accs.append(val_accuracy)

    end_time = time.time()
    epoch_duration = end_time - start_time
    epoch_times.append(epoch_duration)

    print(f"\nEpoch {epoch+1}/{EPOCHS} — "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%, "
          f"Time: {epoch_duration:.2f}s")

# -----------------------------
# Final Test Evaluation with Confidence
# -----------------------------
model.eval()
all_preds = []
all_targets = []
all_confidences = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)  # Get class probabilities
        confidences, preds = torch.max(probs, 1)

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(labels.numpy())
        all_confidences.extend(confidences.cpu().numpy())

# -----------------------------
# Save Model & Metrics
# -----------------------------
torch.save(model.state_dict(), MODEL_SAVE_PATH)
print(f"\n✅ Model saved to: {MODEL_SAVE_PATH}")

metrics = {
    "train_loss": train_losses,
    "val_loss": val_losses,
    "train_acc": train_accs,
    "val_acc": val_accs,
    "epoch_times": epoch_times,
    "predictions": all_preds,
    "targets": all_targets,
    "confidences": all_confidences,
    "class_names": class_names
}

with open(METRICS_FILE, "w") as f:
    json.dump(metrics, f)

print(f"✅ Metrics saved to: {METRICS_FILE}")


✅ Using device: cuda


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:07<00:00, 78.8MB/s]
Epoch 1/5 - Training: 100%|██████████| 53/53 [09:18<00:00, 10.54s/it]



Epoch 1/5 — Train Loss: 1.2150, Train Acc: 61.31%, Val Loss: 0.4019, Val Acc: 83.06%, Time: 666.83s


Epoch 2/5 - Training: 100%|██████████| 53/53 [00:35<00:00,  1.49it/s]



Epoch 2/5 — Train Loss: 0.4821, Train Acc: 81.49%, Val Loss: 0.3812, Val Acc: 85.56%, Time: 42.93s


Epoch 3/5 - Training: 100%|██████████| 53/53 [00:34<00:00,  1.54it/s]



Epoch 3/5 — Train Loss: 0.3710, Train Acc: 86.55%, Val Loss: 0.3663, Val Acc: 86.11%, Time: 41.12s


Epoch 4/5 - Training: 100%|██████████| 53/53 [00:34<00:00,  1.52it/s]



Epoch 4/5 — Train Loss: 0.3703, Train Acc: 86.73%, Val Loss: 0.2612, Val Acc: 91.39%, Time: 41.64s


Epoch 5/5 - Training: 100%|██████████| 53/53 [00:35<00:00,  1.50it/s]



Epoch 5/5 — Train Loss: 0.3020, Train Acc: 88.87%, Val Loss: 0.3173, Val Acc: 90.56%, Time: 41.80s

✅ Model saved to: vgg16_epoch5.pth


TypeError: Object of type int64 is not JSON serializable

In [1]:
import numpy as np

# Convert any numpy types to native Python types
def convert_to_native(obj):
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, (np.int64, np.int32, np.integer)):
        return int(obj)
    elif isinstance(obj, (np.float64, np.float32, np.floating)):
        return float(obj)
    elif isinstance(obj, list):
        return [convert_to_native(i) for i in obj]
    elif isinstance(obj, dict):
        return {k: convert_to_native(v) for k, v in obj.items()}
    else:
        return obj

# Apply conversion
metrics = convert_to_native(metrics)

# Save as JSON
with open(METRICS_FILE, "w") as f:
    json.dump(metrics, f)


NameError: name 'metrics' is not defined