Mounted at /content/drive


In [None]:
import os
import json
import time
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm

# -----------------------------
# Device & Paths
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Using device: {device}")

DATA_DIR = '/content/drive/MyDrive/Colab Notebooks/tb_data'
MODEL_SAVE_PATH = "resnet16_epoch5.pth"
METRICS_FILE = "resnet16_metrics.json"

BATCH_SIZE = 32
IMG_SIZE = 224
EPOCHS = 15

# -----------------------------
# Data Transforms
# -----------------------------
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# -----------------------------
# Load Dataset
# -----------------------------
dataset = datasets.ImageFolder(DATA_DIR, transform=transform)
class_names = dataset.classes

train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_ds, val_ds, test_ds = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)

# -----------------------------
# Model Setup: ResNet18
# -----------------------------
resnet = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
for param in resnet.parameters():
    param.requires_grad = False

# Replace the final fully connected layer
resnet.fc = nn.Linear(resnet.fc.in_features, len(class_names))
model = resnet.to(device)

# -----------------------------
# Loss and Optimizer
# -----------------------------
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

# -----------------------------
# Training Loop
# -----------------------------
train_losses, val_losses, train_accs, val_accs, epoch_times = [], [], [], [], []

for epoch in range(EPOCHS):
    start_time = time.time()
    model.train()
    total_loss, correct, total = 0.0, 0, 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} - Training"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_loss = total_loss / len(train_loader)
    train_accuracy = 100 * correct / total
    train_losses.append(train_loss)
    train_accs.append(train_accuracy)

    # -----------------------------
    # Validation
    # -----------------------------
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)

    val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * val_correct / val_total
    val_losses.append(val_loss)
    val_accs.append(val_accuracy)

    end_time = time.time()
    epoch_times.append(end_time - start_time)

    print(f"\nEpoch {epoch+1}/{EPOCHS} — "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%, "
          f"Time: {epoch_times[-1]:.2f}s")

# -----------------------------
# Final Test Evaluation (with confidence scores)
# -----------------------------
model.eval()
all_preds = []
all_targets = []
all_confidences = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)  # Get class probabilities
        confidences, preds = torch.max(probs, 1)  # Confidence = max probability

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(labels.numpy())
        all_confidences.extend(confidences.cpu().numpy())

# -----------------------------
# Save Model & Metrics
# -----------------------------
torch.save(model.state_dict(), MODEL_SAVE_PATH)
print(f"\n✅ Model saved to: {MODEL_SAVE_PATH}")

metrics = {
    "train_loss": train_losses,
    "val_loss": val_losses,
    "train_acc": train_accs,
    "val_acc": val_accs,
    "epoch_times": epoch_times,
    "predictions": all_preds,
    "targets": all_targets,
    "confidences": all_confidences,
    "class_names": class_names
}

with open(METRICS_FILE, "w") as f:
    json.dump(metrics, f)

print(f"✅ Metrics saved to: {METRICS_FILE}")


✅ Using device: cpu


Epoch 1/15 - Training: 100%|██████████| 53/53 [03:23<00:00,  3.84s/it]



Epoch 1/15 — Train Loss: 0.8664, Train Acc: 61.19%, Val Loss: 0.8201, Val Acc: 63.06%, Time: 242.21s


Epoch 2/15 - Training: 100%|██████████| 53/53 [03:19<00:00,  3.76s/it]



Epoch 2/15 — Train Loss: 0.6330, Train Acc: 75.95%, Val Loss: 0.6307, Val Acc: 75.28%, Time: 237.80s


Epoch 3/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.62s/it]



Epoch 3/15 — Train Loss: 0.5582, Train Acc: 79.76%, Val Loss: 0.4971, Val Acc: 82.50%, Time: 230.83s


Epoch 4/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.62s/it]



Epoch 4/15 — Train Loss: 0.5145, Train Acc: 81.07%, Val Loss: 0.4763, Val Acc: 82.22%, Time: 235.00s


Epoch 5/15 - Training: 100%|██████████| 53/53 [03:12<00:00,  3.63s/it]



Epoch 5/15 — Train Loss: 0.5017, Train Acc: 80.95%, Val Loss: 0.4529, Val Acc: 82.50%, Time: 230.90s


Epoch 6/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.61s/it]



Epoch 6/15 — Train Loss: 0.4670, Train Acc: 82.68%, Val Loss: 0.4749, Val Acc: 82.22%, Time: 230.10s


Epoch 7/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.62s/it]



Epoch 7/15 — Train Loss: 0.4457, Train Acc: 83.10%, Val Loss: 0.4503, Val Acc: 82.78%, Time: 230.06s


Epoch 8/15 - Training: 100%|██████████| 53/53 [03:12<00:00,  3.62s/it]



Epoch 8/15 — Train Loss: 0.4179, Train Acc: 84.88%, Val Loss: 0.4379, Val Acc: 83.06%, Time: 230.49s


Epoch 9/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.62s/it]



Epoch 9/15 — Train Loss: 0.4222, Train Acc: 84.40%, Val Loss: 0.4159, Val Acc: 85.28%, Time: 230.20s


Epoch 10/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.61s/it]



Epoch 10/15 — Train Loss: 0.4244, Train Acc: 83.63%, Val Loss: 0.3705, Val Acc: 87.78%, Time: 230.07s


Epoch 11/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.61s/it]



Epoch 11/15 — Train Loss: 0.4244, Train Acc: 84.52%, Val Loss: 0.4146, Val Acc: 83.33%, Time: 229.93s


Epoch 12/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.61s/it]



Epoch 12/15 — Train Loss: 0.4165, Train Acc: 84.11%, Val Loss: 0.4009, Val Acc: 84.17%, Time: 229.81s


Epoch 13/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.61s/it]



Epoch 13/15 — Train Loss: 0.3873, Train Acc: 86.49%, Val Loss: 0.3852, Val Acc: 84.72%, Time: 228.78s


Epoch 14/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.62s/it]



Epoch 14/15 — Train Loss: 0.3877, Train Acc: 85.12%, Val Loss: 0.3888, Val Acc: 83.89%, Time: 229.23s


Epoch 15/15 - Training: 100%|██████████| 53/53 [03:11<00:00,  3.62s/it]



Epoch 15/15 — Train Loss: 0.3950, Train Acc: 84.58%, Val Loss: 0.3683, Val Acc: 85.28%, Time: 229.25s

✅ Model saved to: resnet16_epoch5.pth


TypeError: Object of type int64 is not JSON serializable

In [None]:
import numpy as np

# Convert any numpy types to native Python types
def convert_to_native(obj):
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, (np.int64, np.int32, np.integer)):
        return int(obj)
    elif isinstance(obj, (np.float64, np.float32, np.floating)):
        return float(obj)
    elif isinstance(obj, list):
        return [convert_to_native(i) for i in obj]
    elif isinstance(obj, dict):
        return {k: convert_to_native(v) for k, v in obj.items()}
    else:
        return obj

# Apply conversion
metrics = convert_to_native(metrics)

# Save as JSON
with open(METRICS_FILE, "w") as f:
    json.dump(metrics, f)
