# Inference and Evaluation Program

In [7]:

import os
import shutil
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import gc
from torchvision import models, transforms
from PIL import Image, UnidentifiedImageError
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cpu


## 1. Data Preparation
Checks 'Test' folder, populates it from 'Train' if needed, and generates 'test.csv'.

In [8]:

def prepare_test_data(train_dir='Train', test_dir='Test', train_csv='dataset.csv', test_csv='test.csv'):
    print("Preparing Test Data...")
    
    # 1. Load Train CSV to get class mapping (to ensure consistency)
    if not os.path.exists(train_csv):
        print(f"Error: {train_csv} not found. Cannot ensure class consistency.")
        return None, None
    
    df_train = pd.read_csv(train_csv)
    unique_labels = sorted(df_train['label'].unique()) # Sort to ensure deterministic order if not provided
    # However, app.py uses: unique_labels = df['label'].unique() (order of appearance)
    # To be safe, we should use the exact logic from app.py or load the mapping if saved.
    # Let's stick to the order in dataset.csv for consistency with trained models.
    unique_labels_original = df_train['label'].unique()
    label_to_idx = {label: idx for idx, label in enumerate(unique_labels_original)}
    idx_to_label = {idx: label for idx, label in enumerate(unique_labels_original)}
    
    print(f"Found {len(unique_labels_original)} classes in Train dataset.")

    # 2. Populate Test Directory if empty
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)
        
    # Check if Test folder is populated
    test_classes = os.listdir(test_dir)
    if len(test_classes) < len(unique_labels_original):
        print("Populating Test folder by copying 1 image from each Train class...")
        for label in unique_labels_original:
            src_class_dir = os.path.join(train_dir, label)
            dst_class_dir = os.path.join(test_dir, label)
            
            if not os.path.exists(dst_class_dir):
                os.makedirs(dst_class_dir)
                
            # Check if destination has images
            if len(os.listdir(dst_class_dir)) == 0:
                if os.path.exists(src_class_dir):
                    images = [f for f in os.listdir(src_class_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
                    if images:
                        # Pick the last one to minimize impact on training if we were moving, but we are copying
                        img_to_copy = images[-1] 
                        shutil.copy(os.path.join(src_class_dir, img_to_copy), os.path.join(dst_class_dir, img_to_copy))
    
    # 3. Create Test CSV
    print("Generating test.csv...")
    test_data = []
    for root, dirs, files in os.walk(test_dir):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                label = os.path.basename(root)
                if label in label_to_idx:
                    test_data.append({'gambar': file, 'label': label, 'label_idx': label_to_idx[label]})
    
    df_test = pd.DataFrame(test_data)
    df_test.to_csv(test_csv, index=False)
    print(f"Test CSV created with {len(df_test)} samples.")
    
    return df_test, idx_to_label

def verify_images(directory):
    print(f"Verifying images in {directory}...")
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                path = os.path.join(root, file)
                try:
                    with Image.open(path) as img:
                        img.verify()
                except (IOError, SyntaxError, UnidentifiedImageError) as e:
                    print(f"Corrupted image found and removed: {path} ({e})")
                    os.remove(path)

# Run Data Prep
df_test, idx_to_label = prepare_test_data()
verify_images('Test')
num_classes = len(idx_to_label)


Preparing Test Data...
Found 68 classes in Train dataset.
Generating test.csv...
Test CSV created with 67 samples.
Verifying images in Test...


## 2. Dataset Class

In [9]:

class FaceTestDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        img_name = row['gambar']
        label_name = row['label']
        label_idx = row['label_idx']
        
        img_path = os.path.join(self.root_dir, label_name, img_name)
        
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading {img_path}: {e}")
            image = Image.new('RGB', (224, 224)) # Dummy
            
        if self.transform:
            image = self.transform(image)

        return image, label_idx


## 3. Model Definitions

In [10]:

# 1. EfficientNet V2 M (from app.py)
def get_efficientnet_model(num_classes):
    print("Creating EfficientNet V2 M model...")
    weights = models.EfficientNet_V2_M_Weights.DEFAULT
    model = models.efficientnet_v2_m(weights=weights)
    
    # Replicate the modification logic
    in_features = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features, num_classes)
    )
    return model

# 2. Swin Transformer V2 Tiny (from notebook)
def get_swin_model(num_classes):
    print("Creating Swin Transformer V2 Tiny model...")
    # Note: Ensure torchvision version supports swin_v2_t
    try:
        weights = models.Swin_V2_T_Weights.DEFAULT
        model = models.swin_v2_t(weights=weights)
    except AttributeError:
        # Fallback if V2 not available or naming differs, try V1 or check version
        print("Warning: Swin_V2_T not found, trying Swin_T")
        weights = models.Swin_T_Weights.DEFAULT
        model = models.swin_t(weights=weights)

    model.head = nn.Sequential(
        nn.Dropout(0.3),
        nn.Linear(model.head.in_features, num_classes)
    )
    return model


## 4. Evaluation Function

In [11]:

def evaluate_model(model, dataloader, device, model_name="Model"):
    model.eval()
    all_preds = []
    all_labels = []
    
    print(f"Evaluating {model_name}...")
    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc=f"Evaluating {model_name}"):
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
    # Calculate Metrics
    precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    
    print(f"\n--- Results for {model_name} ---")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    
    return all_labels, all_preds


## 5. Main Execution
Loads models, runs inference, and prints metrics.

In [12]:

# Configuration for models
models_config = [
    {
        "name": "EfficientNet V2 M",
        "path": "face_recognition_project/efficientnet_v2_m_finetuned.pth",
        "img_size": 480,
        "get_model": get_efficientnet_model
    },
    {
        "name": "Swin Transformer V2 Tiny",
        "path": "face_recognition_project/swin_v2_t_finetuned.pth",
        "img_size": 256,
        "get_model": get_swin_model
    }
]

results = {}

for config in models_config:
    print(f"\nProcessing {config['name']}...")
    
    # Clear memory
    torch.cuda.empty_cache()
    gc.collect()
    
    # 1. Setup Transform
    transform = transforms.Compose([
        transforms.Resize((config['img_size'], config['img_size'])),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # 2. Setup DataLoader
    test_dataset = FaceTestDataset(df_test, 'Test', transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # 3. Load Model
    model = config['get_model'](num_classes)
    
    if os.path.exists(config['path']):
        try:
            state_dict = torch.load(config['path'], map_location=device)
            model.load_state_dict(state_dict)
            print("Weights loaded successfully.")
        except Exception as e:
            print(f"Error loading weights: {e}")
            continue
    else:
        print(f"Weight file not found at {config['path']}")
        continue
        
    model.to(device)
    
    # 4. Evaluate
    labels, preds = evaluate_model(model, test_loader, device, config['name'])
    results[config['name']] = {'labels': labels, 'preds': preds}

print("\nDone.")



Processing EfficientNet V2 M...
Creating EfficientNet V2 M model...
Weights loaded successfully.
Evaluating EfficientNet V2 M...


Evaluating EfficientNet V2 M: 100%|██████████| 3/3 [00:35<00:00, 11.90s/it]



--- Results for EfficientNet V2 M ---
Precision: 1.0000
Recall:    1.0000
F1 Score:  1.0000

Processing Swin Transformer V2 Tiny...
Creating Swin Transformer V2 Tiny model...
Weights loaded successfully.
Evaluating Swin Transformer V2 Tiny...


Evaluating Swin Transformer V2 Tiny: 100%|██████████| 3/3 [00:12<00:00,  4.05s/it]


--- Results for Swin Transformer V2 Tiny ---
Precision: 1.0000
Recall:    1.0000
F1 Score:  1.0000

Done.



