In [None]:
# ===================================================
# Task A: Gender Classification - Load & Evaluate
# ===================================================

# 1. Mount Google Drive to access the saved model
from google.colab import drive
drive.mount('/content/drive')

# 2. Import libraries
import torch
import torchvision
from torch import nn
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 3. Set device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 4. Recreate the model architecture (must match training code)
model = torchvision.models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)
model.load_state_dict(torch.load("/content/drive/MyDrive/model_resnet18_task_a.pth"))
model = model.to(device)
model.eval()  # Set to evaluation mode

# 5. Define transforms (same as training/validation)
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# 6. Load datasets
train_data = datasets.ImageFolder("/content/drive/MyDrive/Task_A/train", transform=val_transform)
train_loader = DataLoader(train_data, batch_size=32, shuffle=False)

val_data = datasets.ImageFolder("/content/drive/MyDrive/Task_A/val", transform=val_transform)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

class_names = val_data.classes

# 7. Evaluation function
def evaluate_model(model, loader, split_name="Validation"):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    prec = precision_score(all_labels, all_preds, average='macro')
    rec = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f"\n {split_name} Evaluation Metrics:")
    print(f"Accuracy:  {acc * 100:.2f}%")
    print(f"Precision: {prec * 100:.2f}%")
    print(f"Recall:    {rec * 100:.2f}%")
    print(f"F1 Score:  {f1 * 100:.2f}%")

    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, target_names=class_names))

    if split_name == "Validation":
        cm = confusion_matrix(all_labels, all_preds)
        sns.heatmap(cm, annot=True, fmt="d", xticklabels=class_names, yticklabels=class_names)
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plt.title("Confusion Matrix - Validation")
        plt.show()

# 8. Run evaluation
evaluate_model(model, train_loader, split_name="Training")
evaluate_model(model, val_loader, split_name="Validation")
