# HerdNet Model Evaluation

This notebook evaluates the latest trained HerdNet model on the test dataset.

## Setup

In [5]:
import sys
import os
import glob
import albumentations as A
from torch.utils.data import DataLoader
from animaloc.datasets import CSVDataset
from animaloc.data.transforms import DownSample
from animaloc.models import HerdNet, load_model
from animaloc.eval import PointsMetrics, HerdNetStitcher, HerdNetEvaluator
from animaloc.utils.seed import set_seed
from animaloc.utils.useful_funcs import mkdir

# Set seed for reproducibility
set_seed(1)

## Configuration

In [6]:
# Model configuration (matching training config)
num_classes = 7
down_ratio = 2
patch_size = 512

# Data paths
test_csv = '/home/lmanrique/Do/HerdNet/data/groundtruth/csv/test_big_size_A_B_E_K_WH_WB_points.csv'
test_root = '/home/lmanrique/Do/HerdNet/data/test'

# Find the latest model
output_dirs = glob.glob('/home/lmanrique/Do/HerdNet/outputs/2025-09-13/*/')
output_dirs.sort(key=lambda x: os.path.basename(x.rstrip('/')))
latest_dir = output_dirs[-1]

# Look for best_model.pth, fallback to latest_model.pth
best_model_path = os.path.join(latest_dir, 'best_model.pth')
latest_model_path = os.path.join(latest_dir, 'latest_model.pth')

if os.path.exists(best_model_path):
  model_path = best_model_path
  print(f"Using best model: {model_path}")
elif os.path.exists(latest_model_path):
  model_path = latest_model_path
  print(f"Using latest model: {model_path}")
else:
  raise FileNotFoundError(f"No model found in {latest_dir}")

# Output directory for evaluation results
eval_dir = '/home/lmanrique/Do/HerdNet/evaluation_output'
mkdir(eval_dir)

print(f"Test CSV: {test_csv}")
print(f"Test images: {test_root}")
print(f"Model path: {model_path}")
print(f"Output directory: {eval_dir}")


Using latest model: /home/lmanrique/Do/HerdNet/outputs/2025-09-13/22-52-06/latest_model.pth
Test CSV: /home/lmanrique/Do/HerdNet/data/groundtruth/csv/test_big_size_A_B_E_K_WH_WB_points.csv
Test images: /home/lmanrique/Do/HerdNet/data/test
Model path: /home/lmanrique/Do/HerdNet/outputs/2025-09-13/22-52-06/latest_model.pth
Output directory: /home/lmanrique/Do/HerdNet/evaluation_output


## Load Test Dataset

In [7]:
# Test dataset with normalization and downsampling transforms
test_dataset = CSVDataset(
    csv_file=test_csv,
    root_dir=test_root,
    albu_transforms=[
        A.Resize(height=patch_size, width=patch_size, p=1.0),
        A.Normalize(p=1.0)
    ],
    end_transforms=[DownSample(down_ratio=down_ratio, anno_type='point')]
)

# Test dataloader
test_dataloader = DataLoader(
    dataset=test_dataset, 
    batch_size=1, 
    shuffle=False,
    num_workers=4
)

print(f"Test dataset size: {len(test_dataset)}")

Test dataset size: 258


## Load Model

In [8]:
import torch

# Initialize HerdNet model
herdnet = HerdNet(
    num_classes=num_classes,
    down_ratio=down_ratio,
    num_layers=34,
    pretrained=False,  # Set to False since we're loading trained weights
    head_conv=64
).cuda()

# Load trained model weights with custom handling for LossWrapper
checkpoint = torch.load(model_path, map_location='cuda')

# Check if the model was saved with LossWrapper (keys prefixed with 'model.')
if 'model_state_dict' in checkpoint:
    state_dict = checkpoint['model_state_dict']
else:
    state_dict = checkpoint

# Remove 'model.' prefix if present
if any(key.startswith('model.') for key in state_dict.keys()):
    new_state_dict = {}
    for key, value in state_dict.items():
        if key.startswith('model.'):
            new_key = key[6:]  # Remove 'model.' prefix
            new_state_dict[new_key] = value
        else:
            new_state_dict[key] = value
    state_dict = new_state_dict

# Load the corrected state dict
herdnet.load_state_dict(state_dict, strict=False)  # Use strict=False to handle minor mismatches
print(f"Model loaded successfully from {model_path}")

Model loaded successfully from /home/lmanrique/Do/HerdNet/outputs/2025-09-13/22-52-06/latest_model.pth


## Setup Evaluation Components

In [9]:
# Evaluation metrics (radius=5 to match config threshold)
metrics = PointsMetrics(radius=5, num_classes=num_classes)

# Stitcher for handling full-size images
stitcher = HerdNetStitcher(
    model=herdnet,
    size=(patch_size, patch_size),
    overlap=0,  # No overlap as per config
    down_ratio=down_ratio,
    reduction='mean'
)

# Evaluator
evaluator = HerdNetEvaluator(
    model=herdnet,
    dataloader=test_dataloader,
    metrics=metrics,
    stitcher=stitcher,
    work_dir=eval_dir,
    header='test_evaluation'
)

print("Evaluation components initialized")

Evaluation components initialized


## Run Evaluation

In [10]:
# Run evaluation and get F1 score
print("Starting evaluation...")
test_f1_score = evaluator.evaluate(returns='f1_score')

# Print results
print(f"\n=== EVALUATION RESULTS ===")
print(f"Global F1 Score: {test_f1_score * 100:.2f}%")
print(f"Model: {os.path.basename(model_path)}")
print(f"Test dataset: {len(test_dataset)} images")
print(f"Results saved to: {eval_dir}")

Starting evaluation...


IndexError: index 1 is out of bounds for dimension 0 with size 1

## Additional Metrics

In [11]:
# Get detailed evaluation results
if hasattr(evaluator, 'results'):
    results = evaluator.results
    print("\n=== DETAILED RESULTS ===")
    print(results.head() if hasattr(results, 'head') else results)
else:
    print("\nDetailed results not available")

# Additional metrics if available
try:
    precision = evaluator.evaluate(returns='precision')
    recall = evaluator.evaluate(returns='recall')
    print(f"\nPrecision: {precision * 100:.2f}%")
    print(f"Recall: {recall * 100:.2f}%")
except:
    print("\nAdditional metrics (precision/recall) not available")

AssertionError: No metrics have been stored, please use the evaluate method first.