# Evaluating the Multi-Modal Neural Network

This notebook shows how to load a trained model and run benchmarks on evaluation datasets.

## Import Libraries

In [None]:
import sys
sys.path.append('..')

import torch
import yaml
from src.evaluation.metrics import compute_metrics
from src.models import load_model
from src.data.dataset import load_eval_dataset

print("Libraries imported successfully")

## Load Model

In [None]:
# Load trained model
model_path = '../checkpoints/best_model.pth'
model = load_model(model_path)
model.eval()
print("Model loaded and set to evaluation mode")

## Load Evaluation Data

In [None]:
# Load evaluation dataset
eval_dataset = load_eval_dataset('../configs/default.yaml')
eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size=32, shuffle=False)
print(f"Evaluation dataset loaded with {len(eval_dataset)} samples")

## Run Evaluation

In [None]:
# Run evaluation
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in eval_loader:
        outputs = model(batch['input'])
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(batch['label'].cpu().numpy())

# Compute metrics
metrics = compute_metrics(all_preds, all_labels)
print("Evaluation Results:")
for key, value in metrics.items():
    print(f"{key}: {value:.4f}")