## DF vs DF_FineTune

- Use validation set to report results
- Note these are two different experiments with two different datasets

- With megadetector created val/test set, test DF-fine-tuned:
    - Dataset where megadetector detected an animal
    - Pass through all non-empty. If human/vehicle, set confidence interval for classification higher?
    - Precision/recall/F1 for all species, plus total
    - 5 epochs, lr1e-5
    - 5 epochs, lr1e-5 & 5 epochs, wb loss

- Steps
    - load val and test set into data loader/transform
    - initialise models:
        - DeepFaune
        - FineTuned
    - Get predictions, put in excel table
    - Analyse results
        - Precision/recall/F1
        - by species

In [None]:
import os
import socket
import getpass
import torch
import timm
from PIL import Image
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Define constants
ANIMAL_CLASSES = ["badger", "ibex", "red deer", "chamois", "cat", "goat", "roe deer", "dog", "squirrel", "equid", "genet",
                  "hedgehog", "lagomorph", "wolf", "lynx", "marmot", "micromammal", "mouflon",
                  "sheep", "mustelid", "bird", "bear", "nutria", "fox", "wild boar", "cow"]

# Define model paths
MODEL_PATHS = [
    "../models/deepfaune-vit_large_patch14_dinov2.lvd142m.pt",
    "../models/Boar Balanced PrecisionRecall - 96.8-98.7-deepfaune-finetuned-epochs-15-lr-1e-05-wbpenalty-0for5,10for5,0for5.pt",
    "../models/Best Oveall Loss Model - deepfaune-finetuned-3epochs-lr1e-5.pt"
]

# Define data paths
VAL_DIR = "../data/split_data/val"
TEST_DIR = "../data/split_data/test"

# Set the PyTorch device (GPU/cuda or CPU)
if torch.cuda.is_available():
    device = torch.device("cuda")
    gpu_name = torch.cuda.get_device_name()
    print(f"GPU name: {gpu_name} ({torch.cuda.device_count()} available)")
    print("Host name: ", socket.gethostname())
    print("User name: ", getpass.getuser())

    if socket.gethostname() == "gpuhost001.jc.rl.ac.uk":
        def select_gpu_with_most_free_memory():
            max_memory_available = 0
            gpu_id_with_max_memory = 0
            for i in range(torch.cuda.device_count()):
                torch.cuda.set_device(i)
                free_mem, total_mem = torch.cuda.mem_get_info()
                free_mem_gib = free_mem / (1024 ** 3)
                free_mem_rounded = round(free_mem_gib, 2)
                print(f"GPU {i} free memory: {free_mem_rounded} GiB")
                if free_mem_gib >= max_memory_available:
                    max_memory_available = free_mem_gib
                    gpu_id_with_max_memory = i
            return gpu_id_with_max_memory

        best_gpu = select_gpu_with_most_free_memory()
        torch.cuda.set_device(best_gpu)
        print(f"Using GPU: {best_gpu}")
    else:
        _, max_memory = torch.cuda.mem_get_info()
        max_memory = max_memory / (1024 ** 3)
        print(f"GPU memory: {max_memory} GiB")
else:
    device = torch.device("cpu")
    print("No GPU available.")

gpu_override = False
if gpu_override:
    torch.cuda.set_device(3)
    print(f"OVERRIDE: Using GPU: {3}")

# Define transformations
transforms = transforms.Compose([
    transforms.Resize((182, 182), interpolation=InterpolationMode.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Define Dataset class
class AnimalDataset(Dataset):
    def __init__(self, directory, transform=None, preload_to_gpu=False):
        self.directory = directory
        self.transform = transform
        self.images = []
        self.labels = []
        self.preload_to_gpu = preload_to_gpu

        for label in os.listdir(directory):
            label_dir = os.path.join(directory, label)
            if os.path.isdir(label_dir):
                for image in os.listdir(label_dir):
                    image_path = os.path.join(label_dir, image)
                    self.images.append(image_path)
                    self.labels.append(ANIMAL_CLASSES.index(label))

        if self.preload_to_gpu:
            self.preload_images()

    def preload_images(self):
        self.loaded_images = []
        for image_path in tqdm(self.images, desc="Preloading images to GPU"):
            image = Image.open(image_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            self.loaded_images.append(image.to(device))
        self.labels = torch.tensor(self.labels, device=device)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        if self.preload_to_gpu:
            return self.loaded_images[idx], self.labels[idx]
        else:
            image_path = self.images[idx]
            label = self.labels[idx]
            image = Image.open(image_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label

# Define Classifier class
class Classifier:
    def __init__(self, model_path):
        self.model = timm.create_model('vit_large_patch14_dinov2', pretrained=False, num_classes=len(ANIMAL_CLASSES), dynamic_img_size=True)
        state_dict = torch.load(model_path, map_location=device)
        if 'state_dict' in state_dict:
            state_dict = state_dict['state_dict']
        if any(k.startswith('model.') for k in state_dict.keys()):
            state_dict = {k.replace('model.', ''): v for k, v in state_dict.items()}
        self.model.load_state_dict({k.replace('base_model.', ''): v for k, v in state_dict.items()})
        self.model.to(device)
        self.model.eval()

    def predict(self, image):
        with torch.no_grad():
            output = self.model(image.unsqueeze(0))
            probabilities = torch.nn.functional.softmax(output, dim=1)
            top_p, top_class = probabilities.topk(1, dim=1)
            return top_class.item(), top_p.item()

# Preload datasets
print("Loading Validation Set...")
val_dataset = AnimalDataset(VAL_DIR, transform=transforms, preload_to_gpu=True)
print("Loading Test Set...")
test_dataset = AnimalDataset(TEST_DIR, transform=transforms, preload_to_gpu=True)

# Create data loaders
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Function to run inference and store predictions
def run_inference_and_store_predictions(model_path, data_loader, dataset_type):
    results = {'preds': [], 'probs': [], 'labels': [], 'images': []}
    classifier = Classifier(model_path)
    
    for images, labels in tqdm(data_loader, desc=f"Running inference on {dataset_type} set with model {model_path}"):
        images = images.to(device)
        labels = labels.to(device)
        for i in range(images.size(0)):
            pred_class, pred_prob = classifier.predict(images[i])
            results['preds'].append(pred_class)
            results['probs'].append(pred_prob)
            results['labels'].append(labels[i].item())
            results['images'].append(data_loader.dataset.images[i])
                
    return results

# Function to calculate metrics
def calculate_metrics(results):
    y_true = results['labels']
    y_pred = results['preds']
    
    # Filter out classes with no true samples
    present_classes = [i for i in range(len(ANIMAL_CLASSES)) if i in y_true or i in y_pred]

    precision = precision_score(y_true, y_pred, average=None, labels=present_classes, zero_division=0)
    recall = recall_score(y_true, y_pred, average=None, labels=present_classes, zero_division=0)
    f1 = f1_score(y_true, y_pred, average=None, labels=present_classes, zero_division=0)
    support = np.bincount(y_true, minlength=len(ANIMAL_CLASSES))[present_classes]
    
    overall_precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    overall_recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    overall_f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    
    metrics = {
        'class': [ANIMAL_CLASSES[i] for i in present_classes] + ['overall'],
        'precision': np.append(precision, overall_precision),
        'recall': np.append(recall, overall_recall),
        'f1': np.append(f1, overall_f1),
        'support': np.append(support, len(y_true))
    }
    
    return metrics

# Create empty DataFrames for each model
val_metrics_dfs = {}
test_metrics_dfs = {}

# Iterate over each model and run inference on both validation and test sets
for model_path in MODEL_PATHS:
    print(f"Running inference for model: {model_path}")
    
    # Run inference on validation set
    print("Running inference on validation set...")
    val_results = run_inference_and_store_predictions(model_path, val_loader, 'validation')
    # Run inference on test set
    print("Running inference on test set...")
    test_results = run_inference_and_store_predictions(model_path, test_loader, 'test')
   
    # Calculate and print metrics
    print("Calculating metrics for validation set...")
    val_metrics = calculate_metrics(val_results)
    print("Calculating metrics for test set...")
    test_metrics = calculate_metrics(test_results)
    
    print(f"\nMetrics for model {model_path} on validation set:")
    val_metrics_df = pd.DataFrame(val_metrics)
    print(val_metrics_df)
    print(f"\nMetrics for model {model_path} on test set:")
    test_metrics_df = pd.DataFrame(test_metrics)
    print(test_metrics_df)
    
    # Store results in DataFrames for further analysis
    val_metrics_dfs[model_path] = val_metrics_df
    test_metrics_dfs[model_path] = test_metrics_df

# Save all results to a new Excel file for further analysis
all_results_file = "../data/all_model_metrics.xlsx"
print(f"Saving all results to {all_results_file}...")
with pd.ExcelWriter(all_results_file) as writer:
    for model_path in MODEL_PATHS:
        val_metrics_df = val_metrics_dfs[model_path]
        test_metrics_df = test_metrics_dfs[model_path]
        combined_metrics_df = val_metrics_df.join(test_metrics_df.set_index('class'), on='class', rsuffix='_test')
        sheet_name = os.path.basename(model_path)
        combined_metrics_df.to_excel(writer, sheet_name=sheet_name, index=False)
print("All results saved.")


In [11]:
# Function to run inference and store predictions
def run_inference_and_store_predictions(model_path, data_loader, dataset_type):
    results = {'preds': [], 'probs': [], 'labels': [], 'images': []}
    classifier = Classifier(model_path)
    
    for images, labels in tqdm(data_loader, desc=f"Running inference on {dataset_type} set with model {model_path}"):
        images = images.to(device)
        labels = labels.to(device)
        for i in range(images.size(0)):
            pred_class, pred_prob = classifier.predict(images[i])
            results['preds'].append(pred_class)
            results['probs'].append(pred_prob)
            results['labels'].append(labels[i].item())
            results['images'].append(data_loader.dataset.images[i])
                
    return results

# Function to calculate metrics
def calculate_metrics(results):
    y_true = results['labels']
    y_pred = results['preds']
    
    # Filter out classes with no true samples
    present_classes = [i for i in range(len(ANIMAL_CLASSES)) if i in y_true or i in y_pred]

    precision = precision_score(y_true, y_pred, average=None, labels=present_classes, zero_division=0)
    recall = recall_score(y_true, y_pred, average=None, labels=present_classes, zero_division=0)
    f1 = f1_score(y_true, y_pred, average=None, labels=present_classes, zero_division=0)
    support = np.bincount(y_true, minlength=len(ANIMAL_CLASSES))[present_classes]
    
    overall_precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    overall_recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    overall_f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    
    metrics = {
        'class': [ANIMAL_CLASSES[i] for i in present_classes] + ['overall'],
        'precision': np.append(precision, overall_precision),
        'recall': np.append(recall, overall_recall),
        'f1': np.append(f1, overall_f1),
        'support': np.append(support, len(y_true))
    }
    
    return metrics

# Create empty DataFrames for each model
val_metrics_dfs = {}
test_metrics_dfs = {}

# Iterate over each model and run inference on both validation and test sets
for model_path in MODEL_PATHS:
    print(f"Running inference for model: {model_path}")
    
    # Run inference on validation set
    print("Running inference on validation set...")
    val_results = run_inference_and_store_predictions(model_path, val_loader, 'validation')
    # Run inference on test set
    print("Running inference on test set...")
    test_results = run_inference_and_store_predictions(model_path, test_loader, 'test')

    # Calculate and print metrics
    print("Calculating metrics for validation set...")
    val_metrics = calculate_metrics(val_results)
    print("Calculating metrics for test set...")
    test_metrics = calculate_metrics(test_results)
    
    print(f"\nMetrics for model {model_path} on validation set:")
    val_metrics_df = pd.DataFrame(val_metrics)
    print(val_metrics_df)
    print(f"\nMetrics for model {model_path} on test set:")
    test_metrics_df = pd.DataFrame(test_metrics)
    print(test_metrics_df)
    
    # Store results in DataFrames for further analysis
    val_metrics_dfs[model_path] = val_metrics_df
    test_metrics_dfs[model_path] = test_metrics_df

# Save all results to a new Excel file for further analysis
all_results_file = "../data/all_model_metrics.xlsx"
print(f"Saving all results to {all_results_file}...")
with pd.ExcelWriter(all_results_file) as writer:
    for model_path in MODEL_PATHS:
        val_metrics_df = val_metrics_dfs[model_path]
        test_metrics_df = test_metrics_dfs[model_path]
        combined_metrics_df = val_metrics_df.join(test_metrics_df.set_index('class'), on='class', rsuffix='_test')
        sheet_name = os.path.basename(model_path)
        combined_metrics_df.to_excel(writer, sheet_name=sheet_name, index=False)
print("All results saved.")


Running inference for model: ../models/deepfaune-vit_large_patch14_dinov2.lvd142m.pt
Running inference on validation set...


Running inference on validation set with model ../models/deepfaune-vit_large_patch14_dinov2.lvd142m.pt: 100%|██████████| 3669/3669 [01:17<00:00, 47.28it/s]


Running inference on test set...


Running inference on test set with model ../models/deepfaune-vit_large_patch14_dinov2.lvd142m.pt: 100%|██████████| 3557/3557 [01:14<00:00, 47.43it/s]


Calculating metrics for validation set...
Calculating metrics for test set...

Metrics for model ../models/deepfaune-vit_large_patch14_dinov2.lvd142m.pt on validation set:
          class  precision    recall        f1  support
0        badger   0.950000  0.966102  0.957983       59
1          ibex   0.000000  0.000000  0.000000        0
2      red deer   0.976705  0.928797  0.952149      632
3       chamois   0.181818  1.000000  0.307692        4
4           cat   0.750000  0.960000  0.842105       25
5          goat   0.942623  0.696970  0.801394      165
6      roe deer   0.947494  0.894144  0.920046      444
7           dog   0.789474  0.913043  0.846774      115
8      squirrel   0.129032  1.000000  0.228571        8
9         equid   0.671875  0.860000  0.754386       50
10        genet   0.000000  0.000000  0.000000        0
11    lagomorph   0.652174  1.000000  0.789474       15
12         wolf   0.824742  0.930233  0.874317       86
13         lynx   0.990521  0.933036  0.9609

Running inference on validation set with model ../models/Boar Balanced PrecisionRecall - 96.8-98.7-deepfaune-finetuned-epochs-15-lr-1e-05-wbpenalty-0for5,10for5,0for5.pt: 100%|██████████| 3669/3669 [01:17<00:00, 47.35it/s]


Running inference on test set...


Running inference on test set with model ../models/Boar Balanced PrecisionRecall - 96.8-98.7-deepfaune-finetuned-epochs-15-lr-1e-05-wbpenalty-0for5,10for5,0for5.pt: 100%|██████████| 3557/3557 [01:15<00:00, 47.33it/s]


Calculating metrics for validation set...
Calculating metrics for test set...

Metrics for model ../models/Boar Balanced PrecisionRecall - 96.8-98.7-deepfaune-finetuned-epochs-15-lr-1e-05-wbpenalty-0for5,10for5,0for5.pt on validation set:
        class  precision    recall        f1  support
0      badger   0.966102  0.966102  0.966102       59
1    red deer   0.933642  0.957278  0.945312      632
2     chamois   0.666667  0.500000  0.571429        4
3         cat   0.956522  0.880000  0.916667       25
4        goat   0.777202  0.909091  0.837989      165
5    roe deer   0.954545  0.945946  0.950226      444
6         dog   0.935185  0.878261  0.905830      115
7    squirrel   0.888889  1.000000  0.941176        8
8       equid   0.871795  0.680000  0.764045       50
9   lagomorph   0.933333  0.933333  0.933333       15
10       wolf   0.861702  0.941860  0.900000       86
11       lynx   1.000000  0.946429  0.972477      224
12      sheep   0.971193  0.983333  0.977226      240
13   

Running inference on validation set with model ../models/Best Oveall Loss Model - deepfaune-finetuned-3epochs-lr1e-5.pt: 100%|██████████| 3669/3669 [01:17<00:00, 47.41it/s]


Running inference on test set...


Running inference on test set with model ../models/Best Oveall Loss Model - deepfaune-finetuned-3epochs-lr1e-5.pt: 100%|██████████| 3557/3557 [01:15<00:00, 47.32it/s]


Calculating metrics for validation set...
Calculating metrics for test set...

Metrics for model ../models/Best Oveall Loss Model - deepfaune-finetuned-3epochs-lr1e-5.pt on validation set:
        class  precision    recall        f1  support
0      badger   0.919355  0.966102  0.942149       59
1    red deer   0.968595  0.927215  0.947454      632
2     chamois   1.000000  1.000000  1.000000        4
3         cat   0.960000  0.960000  0.960000       25
4        goat   0.849162  0.921212  0.883721      165
5    roe deer   0.946067  0.948198  0.947132      444
6         dog   0.908333  0.947826  0.927660      115
7    squirrel   0.888889  1.000000  0.941176        8
8       equid   1.000000  0.760000  0.863636       50
9   lagomorph   1.000000  0.933333  0.965517       15
10       wolf   0.872340  0.953488  0.911111       86
11       lynx   0.986301  0.964286  0.975169      224
12      sheep   0.979253  0.983333  0.981289      240
13   mustelid   1.000000  0.870968  0.931034       31
1

