## DF vs DF_FineTune

- Use validation set to report results
- Note these are two different experiments with two different datasets

- With megadetector created val/test set, test DF-fine-tuned:
    - Dataset where megadetector detected an animal
    - Pass through all non-empty. If human/vehicle, set confidence interval for classification higher?
    - Precision/recall/F1 for all species, plus total
    - 5 epochs, lr1e-5
    - 5 epochs, lr1e-5 & 5 epochs, wb loss

- Steps
    - load val and test set into data loader/transform
    - initialise models:
        - DeepFaune
        - FineTuned
    - Get predictions, put in excel table
    - Analyse results
        - Precision/recall/F1
        - by species

In [None]:
import os
import socket
import getpass
import torch
import timm
from PIL import Image
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Define constants
ANIMAL_CLASSES = ["badger", "ibex", "red deer", "chamois", "cat", "goat", "roe deer", "dog", "squirrel", "equid", "genet",
                  "hedgehog", "lagomorph", "wolf", "lynx", "marmot", "micromammal", "mouflon",
                  "sheep", "mustelid", "bird", "bear", "nutria", "fox", "wild boar", "cow"]

# Define model paths
MODEL_PATHS = [
    "../models/deepfaune-vit_large_patch14_dinov2.lvd142m.pt",
    "../models/2024-06-14-00-25-13-deepfaune-finetuned-epochs-30-lr-1e-06-wbpenalty-0.0.pt",
    "../models/2024-06-14-03-34-55-deepfaune-finetuned-epochs-30-lr-1e-06-wbpenalty-10.0.pt"
]

# Define data paths
VAL_DIR = "../data/split_data/val"
TEST_DIR = "../data/split_data/test"

# Set the PyTorch device (GPU/cuda or CPU)
if torch.cuda.is_available():
    device = torch.device("cuda")
    gpu_name = torch.cuda.get_device_name()
    print(f"GPU name: {gpu_name} ({torch.cuda.device_count()} available)")
    print("Host name: ", socket.gethostname())
    print("User name: ", getpass.getuser())

    if socket.gethostname() == "gpuhost001.jc.rl.ac.uk":
        def select_gpu_with_most_free_memory():
            max_memory_available = 0
            gpu_id_with_max_memory = 0
            for i in range(torch.cuda.device_count()):
                torch.cuda.set_device(i)
                free_mem, total_mem = torch.cuda.mem_get_info()
                free_mem_gib = free_mem / (1024 ** 3)
                free_mem_rounded = round(free_mem_gib, 2)
                print(f"GPU {i} free memory: {free_mem_rounded} GiB")
                if free_mem_gib >= max_memory_available:
                    max_memory_available = free_mem_gib
                    gpu_id_with_max_memory = i
            return gpu_id_with_max_memory

        best_gpu = select_gpu_with_most_free_memory()
        torch.cuda.set_device(best_gpu)
        print(f"Using GPU: {best_gpu}")
    else:
        _, max_memory = torch.cuda.mem_get_info()
        max_memory = max_memory / (1024 ** 3)
        print(f"GPU memory: {max_memory} GiB")
else:
    device = torch.device("cpu")
    print("No GPU available.")

gpu_override = False
if gpu_override:
    torch.cuda.set_device(3)
    print(f"OVERRIDE: Using GPU: {3}")

# Define transformations
transforms = transforms.Compose([
    transforms.Resize((182, 182), interpolation=InterpolationMode.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Define Dataset class
class AnimalDataset(Dataset):
    def __init__(self, directory, transform=None, preload_to_gpu=False):
        self.directory = directory
        self.transform = transform
        self.images = []
        self.labels = []
        self.preload_to_gpu = preload_to_gpu

        for label in os.listdir(directory):
            label_dir = os.path.join(directory, label)
            if os.path.isdir(label_dir):
                for image in os.listdir(label_dir):
                    image_path = os.path.join(label_dir, image)
                    self.images.append(image_path)
                    self.labels.append(ANIMAL_CLASSES.index(label))

        if self.preload_to_gpu:
            self.preload_images()

    def preload_images(self):
        self.loaded_images = []
        for image_path in tqdm(self.images, desc="Preloading images to GPU"):
            image = Image.open(image_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            self.loaded_images.append(image.to(device))
        self.labels = torch.tensor(self.labels, device=device)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        if self.preload_to_gpu:
            return self.loaded_images[idx], self.labels[idx]
        else:
            image_path = self.images[idx]
            label = self.labels[idx]
            image = Image.open(image_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label

# Define Classifier class
class Classifier:
    def __init__(self, model_path):
        self.model = timm.create_model('vit_large_patch14_dinov2', pretrained=False, num_classes=len(ANIMAL_CLASSES), dynamic_img_size=True)
        state_dict = torch.load(model_path, map_location=device)
        if 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']
        if any(k.startswith('model.') for k in state_dict.keys()):
            state_dict = {k.replace('model.', ''): v for k, v in state_dict.items()}
        self.model.load_state_dict({k.replace('base_model.', ''): v for k, v in state_dict.items()})
        self.model.to(device)
        self.model.eval()

    def predict(self, image):
        with torch.no_grad():
            output = self.model(image.unsqueeze(0))
            probabilities = torch.nn.functional.softmax(output, dim=1)
            top_p, top_class = probabilities.topk(1, dim=1)
            return top_class.item(), top_p.item()

# Preload datasets
print("Loading Validation Set...")
val_dataset = AnimalDataset(VAL_DIR, transform=transforms, preload_to_gpu=True)
print("Loading Test Set...")
test_dataset = AnimalDataset(TEST_DIR, transform=transforms, preload_to_gpu=True)

# Create data loaders
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)


In [10]:

# Function to run inference and store predictions
def run_inference_and_store_predictions(model_path, data_loader, dataset_type):
    results = {'preds': [], 'probs': [], 'labels': [], 'images': []}
    classifier = Classifier(model_path)
    
    for images, labels in tqdm(data_loader, desc=f"Running inference on {dataset_type} set with model {model_path}"):
        images = images.to(device)
        labels = labels.to(device)
        for i in range(images.size(0)):
            pred_class, pred_prob = classifier.predict(images[i])
            results['preds'].append(pred_class)
            results['probs'].append(pred_prob)
            results['labels'].append(labels[i].item())
            results['images'].append(data_loader.dataset.images[i])
                
    return results

def calculate_metrics(results):
    y_true = results['labels']
    y_pred = results['preds']
    
    # Calculate support for each class
    support = np.bincount(y_true, minlength=len(ANIMAL_CLASSES))
    # Filter out classes with zero support
    present_classes = [i for i, count in enumerate(support) if count > 0]

    # Calculate metrics only for present classes
    precision = precision_score(y_true, y_pred, average=None, labels=present_classes, zero_division=0)
    recall = recall_score(y_true, y_pred, average=None, labels=present_classes, zero_division=0)
    f1 = f1_score(y_true, y_pred, average=None, labels=present_classes, zero_division=0)
    
    # Explicit calculation for verification
    macro_precision = np.mean(precision)
    macro_recall = np.mean(recall)
    macro_f1 = np.mean(f1)
    
    # Calculate overall metrics using macro, micro, and weighted averages
    # macro_precision = precision_score(y_true, y_pred, average='macro', zero_division=np.nan)
    # macro_recall = recall_score(y_true, y_pred, average='macro', zero_division=np.nan)
    # macro_f1 = f1_score(y_true, y_pred, average='macro', zero_division=np.nan)
    
    micro_precision = precision_score(y_true, y_pred, average='micro', zero_division=np.nan)
    micro_recall = recall_score(y_true, y_pred, average='micro', zero_division=np.nan)
    micro_f1 = f1_score(y_true, y_pred, average='micro', zero_division=np.nan)
    
    weighted_precision = precision_score(y_true, y_pred, average='weighted', zero_division=np.nan)
    weighted_recall = recall_score(y_true, y_pred, average='weighted', zero_division=np.nan)
    weighted_f1 = f1_score(y_true, y_pred, average='weighted', zero_division=np.nan)

    metrics = {
        'class': [ANIMAL_CLASSES[i] for i in present_classes] + ['macro', 'micro', 'weighted'],
        'precision': np.append(precision, [macro_precision, micro_precision, weighted_precision]),
        'recall': np.append(recall, [macro_recall, micro_recall, weighted_recall]),
        'f1': np.append(f1, [macro_f1, micro_f1, weighted_f1]),
        'support': np.append(support[present_classes], [len(y_true), len(y_true), len(y_true)])
    }
    
    return metrics



In [11]:

# Create empty DataFrames for each model
val_metrics_dfs = {}
test_metrics_dfs = {}

# Iterate over each model and run inference on both validation and test sets
for model_path in MODEL_PATHS:

    val_results = []
    test_results = []
    val_metrics = []
    test_metrics = []
    val_metrics_df = pd.DataFrame()
    test_metrics_df = pd.DataFrame()


    print(f"Running inference for model: {model_path}")
    
    # Run inference on validation set
    print("Running inference on validation set...")
    val_results = run_inference_and_store_predictions(model_path, val_loader, 'validation')
    # Run inference on test set
    print("Running inference on test set...")
    test_results = run_inference_and_store_predictions(model_path, test_loader, 'test')
   
    # Calculate and print metrics
    print("Calculating metrics for validation set...")
    val_metrics = calculate_metrics(val_results)
    print("Calculating metrics for test set...")
    test_metrics = calculate_metrics(test_results)
    
    print(f"\nMetrics for model {model_path} on validation set:")
    val_metrics_df = pd.DataFrame(val_metrics)
    print(val_metrics_df)
    print(f"\nMetrics for model {model_path} on test set:")
    test_metrics_df = pd.DataFrame(test_metrics)
    print(test_metrics_df)
    
    # Store results in DataFrames for further analysis
    val_metrics_dfs[model_path] = val_metrics_df
    test_metrics_dfs[model_path] = test_metrics_df

# Save all results to a new Excel file for further analysis
all_results_file = "../data/all_model_metrics.xlsx"
print(f"Saving all results to {all_results_file}...")
with pd.ExcelWriter(all_results_file) as writer:
    for model_path in MODEL_PATHS:
        val_metrics_df = val_metrics_dfs[model_path]
        test_metrics_df = test_metrics_dfs[model_path]
        combined_metrics_df = val_metrics_df.join(test_metrics_df.set_index('class'), on='class', rsuffix='_test')
        sheet_name = os.path.basename(model_path)
        combined_metrics_df.to_excel(writer, sheet_name=sheet_name, index=False)
print("All results saved.")


Running inference for model: ../models/deepfaune-vit_large_patch14_dinov2.lvd142m.pt
Running inference on validation set...


Running inference on validation set with model ../models/deepfaune-vit_large_patch14_dinov2.lvd142m.pt: 100%|██████████| 3669/3669 [01:17<00:00, 47.11it/s]


Running inference on test set...


Running inference on test set with model ../models/deepfaune-vit_large_patch14_dinov2.lvd142m.pt: 100%|██████████| 3557/3557 [01:15<00:00, 47.11it/s]


Calculating metrics for validation set...
Calculating metrics for test set...

Metrics for model ../models/deepfaune-vit_large_patch14_dinov2.lvd142m.pt on validation set:
        class  precision    recall        f1  support
0      badger   0.950000  0.966102  0.957983       59
1    red deer   0.976705  0.928797  0.952149      632
2     chamois   0.181818  1.000000  0.307692        4
3         cat   0.750000  0.960000  0.842105       25
4        goat   0.942623  0.696970  0.801394      165
5    roe deer   0.947494  0.894144  0.920046      444
6         dog   0.789474  0.913043  0.846774      115
7    squirrel   0.129032  1.000000  0.228571        8
8       equid   0.671875  0.860000  0.754386       50
9   lagomorph   0.652174  1.000000  0.789474       15
10       wolf   0.824742  0.930233  0.874317       86
11       lynx   0.990521  0.933036  0.960920      224
12      sheep   0.920000  0.958333  0.938776      240
13   mustelid   0.833333  0.967742  0.895522       31
14       bird   0.

Running inference on validation set with model ../models/2024-06-14-00-25-13-deepfaune-finetuned-epochs-30-lr-1e-06-wbpenalty-0.0.pt: 100%|██████████| 3669/3669 [01:17<00:00, 47.13it/s]


Running inference on test set...


Running inference on test set with model ../models/2024-06-14-00-25-13-deepfaune-finetuned-epochs-30-lr-1e-06-wbpenalty-0.0.pt: 100%|██████████| 3557/3557 [01:15<00:00, 47.24it/s]


Calculating metrics for validation set...
Calculating metrics for test set...

Metrics for model ../models/2024-06-14-00-25-13-deepfaune-finetuned-epochs-30-lr-1e-06-wbpenalty-0.0.pt on validation set:
        class  precision    recall        f1  support
0      badger   0.966102  0.966102  0.966102       59
1    red deer   0.958665  0.954114  0.956384      632
2     chamois   1.000000  1.000000  1.000000        4
3         cat   0.958333  0.920000  0.938776       25
4        goat   0.818182  0.927273  0.869318      165
5    roe deer   0.948546  0.954955  0.951740      444
6         dog   0.875000  0.973913  0.921811      115
7    squirrel   0.888889  1.000000  0.941176        8
8       equid   0.877551  0.860000  0.868687       50
9   lagomorph   1.000000  1.000000  1.000000       15
10       wolf   0.929412  0.918605  0.923977       86
11       lynx   0.990868  0.968750  0.979684      224
12      sheep   1.000000  0.962500  0.980892      240
13   mustelid   1.000000  0.967742  0.9836

Running inference on validation set with model ../models/2024-06-14-03-34-55-deepfaune-finetuned-epochs-30-lr-1e-06-wbpenalty-10.0.pt: 100%|██████████| 3669/3669 [01:17<00:00, 47.10it/s]


Running inference on test set...


Running inference on test set with model ../models/2024-06-14-03-34-55-deepfaune-finetuned-epochs-30-lr-1e-06-wbpenalty-10.0.pt: 100%|██████████| 3557/3557 [01:15<00:00, 47.09it/s]


Calculating metrics for validation set...
Calculating metrics for test set...

Metrics for model ../models/2024-06-14-03-34-55-deepfaune-finetuned-epochs-30-lr-1e-06-wbpenalty-10.0.pt on validation set:
        class  precision    recall        f1  support
0      badger   0.982143  0.932203  0.956522       59
1    red deer   0.973641  0.935127  0.953995      632
2     chamois   1.000000  1.000000  1.000000        4
3         cat   1.000000  0.880000  0.936170       25
4        goat   0.803191  0.915152  0.855524      165
5    roe deer   0.941704  0.945946  0.943820      444
6         dog   0.800000  0.939130  0.864000      115
7    squirrel   0.666667  1.000000  0.800000        8
8       equid   0.877551  0.860000  0.868687       50
9   lagomorph   1.000000  1.000000  1.000000       15
10       wolf   0.897727  0.918605  0.908046       86
11       lynx   0.981567  0.950893  0.965986      224
12      sheep   1.000000  0.908333  0.951965      240
13   mustelid   1.000000  0.903226  0.949



In [None]:
val_metrics_dfs[0]