# Tests on dinov2 embeddings

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
!nvidia-smi

## Imports

In [None]:
import os
import torch

## Checking torch cache dir
Warning : make sure to set torch hub cache dir to reuse downloade

In [None]:
torch.hub.get_dir()

## Setting torch cache dir for download

In [None]:
from lynx_id.utils import dinov2_utils

torch_hub_dir = dinov2_utils.set_torch_hub_dir()

## Download dinov2 weights

In [None]:
import torch
import gc  # Garbage collector interface
from tqdm import tqdm  # Import tqdm for progress bar functionality
from lynx_id.utils import dinov2_utils

# List of model names to be downloaded
model_names = [
    'dinov2_vits14_reg',
    'dinov2_vitb14_reg',
    'dinov2_vitl14_reg',
    # 'dinov2_vitg14_reg'  # Uncomment or add more model names as needed
]
download_models = False

if download_models:        
    # Iterate over the list of model names with a progress bar
    for name in tqdm(model_names, desc="Downloading DINOv2 models", unit="model"):
        dinov2_utils.download_and_clear_memory(name)
    
    print("All models are downloaded and cleared from active memory.")

## Checking xformers

In [None]:
from lynx_id.utils import dinov2_utils

XFORMERS_ENABLED, XFORMERS_AVAILABLE = dinov2_utils.check_xformers_status()


## Loading a dinov2

In [None]:
import torch

model_name = 'dinov2_vitl14_reg'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#model = torch.hub.load('facebookresearch/dinov2', model_name, skip_validation=True, pretrained=False).to(device)
model = torch.hub.load('/gpfswork/rech/ads/commun/models/facebookresearch_dinov2_main/', model_name, source='local').to(device)


### Copying last attention layer
Keeping a copy that will be unchange of the layer we will change to not reload

In [None]:
import copy

# Assuming the last attention layer is what you want to clone
original_attn_layer = model.blocks[-1].attn

# Create a deep copy of the attention layer
cloned_attn_layer = copy.deepcopy(original_attn_layer)

### Model and last layer definition (methods and attributes) check

In [None]:
from lynx_id.utils import dinov2_utils

dinov2_utils.inspect_model(model)

### Inspecting output of last attention layer

In [None]:
from lynx_id.utils import dinov2_utils
dinov2_utils.test_attention_output(model, 'cuda')

### Modifying the MemEffAttention

In [None]:
import torch
from torch import nn, Tensor
import torch.nn.functional as F
from xformers.ops import memory_efficient_attention, unbind

# Retrieve the class of the current attention layer for accurate subclassing
actual_attention_class = cloned_attn_layer.__class__
class CustomMemEffAttention(actual_attention_class):
    def __init__(self, dim, num_heads=8, qkv_bias=False, proj_bias=True, attn_drop=0.0, proj_drop=0.0):
        super(CustomMemEffAttention, self).__init__(
            dim=dim, 
            num_heads=num_heads, 
            qkv_bias=qkv_bias, 
            proj_bias=proj_bias, 
            attn_drop=attn_drop, 
            proj_drop=proj_drop
        )
        #self.last_attention_map = None  # Attribute to store the last attention map
    
    def forward(self, x: Tensor, attn_bias=None, return_attn=True) -> Tensor:
        if not XFORMERS_AVAILABLE:
            if attn_bias is not None:
                raise AssertionError("xFormers is required for using nested tensors")
            return super().forward(x)

        B, N, C = x.shape
        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
        q, k, v = torch.unbind(qkv, dim=2)

        x = memory_efficient_attention(q, k, v, attn_bias=attn_bias)

        scale = 1.0 / q.shape[-1] ** 0.5 # Default value which is used in memory_efficient_attention
        #scale = self.scale # Checked the value and it's the same
        
        # Scale queries
        q = q * scale 

        # Transpose for matmul
        q = q.transpose(1, 2)  # (B, num_heads, N, head_dim)
        k = k.transpose(1, 2)  # (B, num_heads, N, head_dim)
        v = v.transpose(1, 2)  # (B, num_heads, N, head_dim)

        # Compute attention scores
        attn = torch.matmul(q, k.transpose(-2, -1))  # (B, num_heads, N, N)

        # Apply attention bias if provided
        if attn_bias is not None:
            attn = attn + attn_bias

        # Apply softmax to get attention probabilities
        attn = F.softmax(attn, dim=-1)  # (B, num_heads, N, N)

        self.last_attention_map = attn.detach()

        x = x.reshape([B, N, C])
        x = self.proj(x)
        x = self.proj_drop(x)
        return x

#if return_attn:
        #    print('got in final return')
        #    return x, self.last_attention_map
        #else:



# Extracting parameters from the cloned attention layer
num_heads = cloned_attn_layer.num_heads  # Number of attention heads
dim = cloned_attn_layer.qkv.out_features // 3  # Dimension should be checked against how qkv is set up in the class

# Bias in qkv and projection can be inferred from the existence and not None check
qkv_bias = cloned_attn_layer.qkv.bias is not None
proj_bias = cloned_attn_layer.proj.bias is not None

# Dropout values; these should be checked if they exist and are applied in the cloned layer
attn_drop = cloned_attn_layer.attn_drop.p if hasattr(cloned_attn_layer, 'attn_drop') else 0.0
proj_drop = cloned_attn_layer.proj_drop.p if hasattr(cloned_attn_layer, 'proj_drop') else 0.0

# Replace the existing attention layer in the last block
model.blocks[-1].attn = CustomMemEffAttention(
    dim=dim,
    num_heads=num_heads,
    qkv_bias=qkv_bias,
    proj_bias=proj_bias,
    attn_drop=attn_drop,
    proj_drop=proj_drop
).to(device) # Since we've replaced a module that contains parameters, ensure to move the parameters to the correct device

### Dynamically Override the forward Method of the model


In [None]:
from lynx_id.utils import dinov2_utils

# Assuming `model` is an instance of DinoVisionTransformer
model.forward = dinov2_utils.modified_forward(model)


In [None]:
model = dinov2_utils.dinov2_modifier(model)

## Test forward

In [None]:
image_size = 518

random_image = torch.randn(1, 3, image_size, image_size).to(device)
# Assuming you can modify how the model is called to include return_attn
output, attentions = model(random_image, return_attn=True)
print("Output shape:", output.shape)
print("Attention Weights Shape:", attentions[0].shape)


## Visualisation of attention head

In [None]:
import torch
from lynx_id.data.dataset import LynxDataset
from pathlib import Path
import cv2
import numpy as np
import albumentations as A
import albumentations.pytorch as AP
import torch
from PIL import Image

from lynx_id.data.transformations_and_augmentations import transforms_dinov2, augments_dinov2


# Load the dataset
dataset_csv = Path('/gpfsscratch/rech/ads/commun/datasets/extracted/lynx_dataset_full.csv')
dataset = LynxDataset(dataset_csv, mode='single', transform = transforms_dinov2, augmentation=augments_dinov2, probabilities=[0,0,1])  # Default mode



# Get the first item from the dataset
input, output = dataset[12]


#input = np.array(Image.open("chat.png").convert('RGB'))

transformed_image = input['image']
#transformed_image = input

# Assuming 'model' is already defined and loaded elsewhere
# Make sure the model is in evaluation modeµ
model.eval()
with torch.no_grad():
    embeddings, attentions = model(transformed_image.unsqueeze(0).to(device), return_attn=True)  # Add batch dimension

attentions = attentions[0] # Keep only the first block
print(embeddings.shape)
print(attentions.shape)
nh = attentions.shape[1] # number of head

In [None]:
attentions = attentions[0, :, 0, 5:].reshape(nh, -1)

attentions.shape

In [None]:
model_patch_size = 14
transformed_image.shape
w_featmap = transformed_image.shape[-2] // model_patch_size
h_featmap = transformed_image.shape[-1] // model_patch_size
print(nh, w_featmap, h_featmap, )

In [None]:
th_attn = attentions > np.quantile(attentions.cpu().numpy(),0.9)
attentions = (th_attn*attentions)/attentions.max()

In [None]:
attentions = attentions.reshape(nh, w_featmap, h_featmap).float()
attentions = nn.functional.interpolate(attentions.unsqueeze(0), scale_factor=model.patch_size, mode="nearest")[0].cpu().numpy()

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

def plot_attentions(image, attentions, method='mean'):
    """
    Plot the image with the attention map next to it, and below all attention heads in a grid with two columns.

    Parameters:
    - image: Tensor of the input image.
    - attentions: numpy array or tensor of shape (num_heads, seq_len, seq_len).
    - method: 'mean' or 'max' to determine how to aggregate attention heads.
    """
    
    #image = image.permute(1,2,0).cpu().numpy()
    
    # Ensure attentions is a numpy array
    if torch.is_tensor(attentions):
        attentions = attentions.cpu().numpy()

    # Aggregate attention heads based on the method
    if method == 'mean':
        attention_avg = np.mean(attentions, axis=0)
    elif method == 'max':
        attention_avg = np.max(attentions, axis=0)
    else:
        raise ValueError("Method must be 'mean' or 'max'")

    # Determine number of heads
    num_heads = attentions.shape[0]
    num_cols = 2
    num_rows = (num_heads + 1) // num_cols + ((num_heads + 1) % num_cols > 0)

    # Plot the input image and the aggregated attention map
    fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, num_rows * 5))

    # Show image
    axs[0, 0].imshow(image.squeeze().permute(1,2,0).cpu().numpy())  # Adjust for image dimensions and type
    axs[0, 0].set_title('Input Image')
    axs[0, 0].axis('off')  # Turn off axis

    # Show aggregated attention map
    attention_map = attention_avg
    axs[0, 1].imshow(attention_map)
    axs[0, 1].set_title(f'Attention Map ({method})')
    axs[0, 1].axis('off')  # Turn off axis

    # Plot each attention head in a grid
    for i in range(num_heads):
        row = (i + 2) // num_cols
        col = (i + 2) % num_cols
        axs[row, col].imshow(attentions[i,:,:], cmap='inferno')
        axs[row, col].set_title(f'Head {i+1}')
        axs[row, col].axis('off')  # Turn off axis

    # Hide any unused subplots
    for i in range(num_heads + 2, num_rows * num_cols):
        row = i // num_cols
        col = i % num_cols
        axs[row, col].axis('off')

    plt.tight_layout()
    plt.show()

# Example usage:
# Assuming `transformed_image` is the input image tensor and `attentions` is the attention tensor
print(type(transformed_image))
plot_attentions(transformed_image, attentions, method='mean')


## Execution performance of dinov2

In [None]:
import os
import torch

def get_available_models(prefix, base_path=torch.hub.get_dir()):
    if base_path is None:
        base_path = os.path.join(torch.hub.get_dir(), 'checkpoints')
    else:
        base_path = os.path.expanduser(base_path)

    available_models = []

    # Walk through all directories and subdirectories
    for root, _, files in os.walk(base_path):
        for file in files:
            if file.startswith(prefix) and file.endswith('.pth'):
                # Append the relative path of the model file
                available_models.append(os.path.join(root, file))

    # Remove the file extension and get unique model types
    available_model_types = list(set([os.path.basename(f).replace('.pth', '') for f in available_models]))
    return available_model_types

# Call the function with a specific prefix and print available models
model_prefix = "dinov2_"  # Change this to the desired prefix
available_dinov2_models = get_available_models(model_prefix)
print(f"Available models with prefix '{model_prefix}':", available_models)

In [None]:
import torch
import torch.cuda as cuda
import time




In [None]:
import os
import time
import torch

# Static dictionary mapping model names to file names
model_name_to_file = {
    'dinov2_vitg14_reg': 'dinov2_vitg14_reg4_pretrain',
    'dinov2_vitl14_reg': 'dinov2_vitl14_reg4_pretrain',
    'dinov2_vitb14_reg': 'dinov2_vitb14_reg4_pretrain',
    'dinov2_vits14_reg': 'dinov2_vits14_reg4_pretrain',
    'dinov2_vits14': 'dinov2_vits14_pretrain'
}

# Define the order of model sizes for sorting
model_order = ['s', 'b', 'l', 'g']

def get_available_models(prefix, base_path=torch.hub.get_dir()):
    if base_path is None:
        base_path = os.path.join(torch.hub.get_dir(), 'checkpoints')
    else:
        base_path = os.path.expanduser(base_path)

    available_files = []

    # Walk through all directories and subdirectories
    for root, _, files in os.walk(base_path):
        for file in files:
            if file.startswith(prefix) and file.endswith('.pth'):
                # Append the relative path of the model file
                available_files.append(os.path.join(root, file))

    # Remove the file extension and get unique model types
    available_model_files = list(set([os.path.basename(f).replace('.pth', '') for f in available_files]))
    return available_model_files

# Function to load models
def load_model(model_name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = torch.hub.load('facebookresearch/dinov2', model_name).to(device)
    return model

# Function to measure inference metrics
def measure_inference_metrics(model, input_tensor):
    model.eval()
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    start_mem = torch.cuda.memory_allocated() if torch.cuda.is_available() else 0
    start_time = time.time()
    with torch.no_grad():
        output = model(input_tensor)
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    end_time = time.time()
    end_mem = torch.cuda.memory_allocated() if torch.cuda.is_available() else 0
    peak_mem = torch.cuda.max_memory_allocated() if torch.cuda.is_available() else 0
    if torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats()
    
    inference_time = end_time - start_time
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    return inference_time, output.shape, (peak_mem - start_mem) / (1024 ** 2)  # MB

# Function to measure training metrics
def measure_training_metrics(model, input_tensor):
    model.train()
    criterion = torch.nn.CrossEntropyLoss()  # Dummy loss function for example
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    start_mem = torch.cuda.memory_allocated() if torch.cuda.is_available() else 0
    start_time = time.time()
    optimizer.zero_grad()
    output = model(input_tensor)
    loss = criterion(output, torch.randint(0, 1000, (input_tensor.size(0),)).to(input_tensor.device))
    loss.backward()
    optimizer.step()
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    end_time = time.time()
    end_mem = torch.cuda.memory_allocated() if torch.cuda.is_available() else 0
    peak_mem = torch.cuda.max_memory_allocated() if torch.cuda.is_available() else 0
    if torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats()
    
    train_time = end_time - start_time
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    return train_time, (peak_mem - start_mem) / (1024 ** 2)  # MB

# Main function to compare models
def compare_models(prefix):
    available_model_files = get_available_models(prefix)
    
    # Sort available models based on predefined order
    sorted_models = sorted(
        available_model_files, 
        key=lambda x: model_order.index(x.split('_')[1][-1]) if x.split('_')[1][-1] in model_order else -1
    )
    print(available_model_files)
    print(sorted_models)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_tensor = torch.randn(1, 3, 224, 224).to(device)  # Example input tensor

    results = {}
    for model_file in sorted_models:
        # Find corresponding model name using the dictionary
        model_name = next((key for key, value in model_name_to_file.items() if value == model_file), None)
        if not model_name:
            continue
        
        print(f"Loading {model_name}...")
        model = load_model(model_name)
        
        print(f"Measuring inference metrics for {model_name}...")
        inference_speed, output_shape, inference_peak_memory = measure_inference_metrics(model, input_tensor)
        
        print(f"Measuring training metrics for {model_name}...")
        training_speed, training_peak_memory = measure_training_metrics(model, input_tensor)
        
        results[model_name] = {
            'Inference': {
                'Speed (s)': inference_speed,
                'Output Shape': output_shape,
                'Peak Memory Usage (MB)': inference_peak_memory
            },
            'Training': {
                'Speed (s)': training_speed,
                'Peak Memory Usage (MB)': training_peak_memory
            }
        }

        print(f"Results for {model_name}:")
        print(f"Inference - Speed: {inference_speed:.6f} seconds, Peak Memory: {inference_peak_memory:.2f} MB")
        print(f"Training - Speed: {training_speed:.6f} seconds, Peak Memory: {training_peak_memory:.2f} MB\n")

    print("All measurements completed.")
    return results

# Call the main function to compare models
model_prefix = "dinov2_"
results = compare_models(model_prefix)
print(results)

## Training tests

In [2]:
%load_ext autoreload
%autoreload 2

In [4]:
import argparse

args = argparse.Namespace(
    train_csv='/lustre/fsn1/projects/rech/ads/commun/datasets/balanced_noswiss_csv/lynx_id_balanced_train.csv',
    val_csv='/lustre/fsn1/projects/rech/ads/commun/datasets/balanced_noswiss_csv/lynx_id_balanced_val.csv',
    test_csv='/lustre/fsn1/projects/rech/ads/commun/datasets/balanced_noswiss_csv/lynx_id_balanced_test.csv',
    model_embedder_weights='/lustre/fswork/projects/rech/ads/commun/models/resnet50/pretrained_weights.pt',
    triplet_precompute_save_path='/lustre/fswork/projects/rech/ads/commun/kg_tests/experiments/megad_noswiss_full/triplet_precompute.npz',
    triplet_precompute_load_path='/lustre/fswork/projects/rech/ads/commun/kg_tests/experiments/megad_noswiss_full/triplet_precompute.npz',
    experiment_path='/lustre/fswork/projects/rech/ads/commun/kg_tests/experiments/megad_noswiss_full_3layers',
    device='cuda',  # or 'cpu', 'auto'
    verbose=True,
    epochs=10,
    debug=False,
    model_type="megadescriptor",
    eval_before_training=False,
    add_dense_layers=True,
    num_dense_layers=3
)

In [5]:
from lynx_id.scripts.train import train_dinov2
train_dinov2.main(args)

Running train_triplets with arguments: Namespace(train_csv='/lustre/fsn1/projects/rech/ads/commun/datasets/balanced_noswiss_csv/lynx_id_balanced_train.csv', val_csv='/lustre/fsn1/projects/rech/ads/commun/datasets/balanced_noswiss_csv/lynx_id_balanced_val.csv', test_csv='/lustre/fsn1/projects/rech/ads/commun/datasets/balanced_noswiss_csv/lynx_id_balanced_test.csv', model_embedder_weights='/lustre/fswork/projects/rech/ads/commun/models/resnet50/pretrained_weights.pt', triplet_precompute_save_path='/lustre/fswork/projects/rech/ads/commun/kg_tests/experiments/megad_noswiss_full/triplet_precompute.npz', triplet_precompute_load_path='/lustre/fswork/projects/rech/ads/commun/kg_tests/experiments/megad_noswiss_full/triplet_precompute.npz', experiment_path='/lustre/fswork/projects/rech/ads/commun/kg_tests/experiments/megad_noswiss_full_3layers', device='cuda', verbose=True, epochs=10, debug=False, model_type='megadescriptor', eval_before_training=False, add_dense_layers=True, num_dense_layers=3)

100%|##########| 2079/2079 [00:00<00:00, 5081560.62it/s]
100%|##########| 2547/2547 [00:00<00:00, 5409059.39it/s]
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Starting training...


Epoch 1/10: 100%|##########| 1090/1090 [15:39<00:00,  1.16it/s, loss=0.257]  


Epoch [1/10], Loss: 0.05554155677283576


Compute embeddings: 100%|##########| 1090/1090 [02:27<00:00,  7.37it/s]
Compute embeddings: 100%|##########| 260/260 [00:37<00:00,  6.95it/s]


TRAIN | Number of images: 8714 | Embedding shape: 1536
VAL   | Number of images: 2079   | Embedding shape: 1536
VAL | Accuracy 1-KNN: 0.30303030303030304


Epoch 2/10: 100%|##########| 1090/1090 [15:37<00:00,  1.16it/s, loss=0.09]   


Epoch [2/10], Loss: 0.03862021868928857


Compute embeddings: 100%|##########| 1090/1090 [02:28<00:00,  7.36it/s]
Compute embeddings: 100%|##########| 260/260 [00:36<00:00,  7.04it/s]


TRAIN | Number of images: 8714 | Embedding shape: 1536
VAL   | Number of images: 2079   | Embedding shape: 1536
VAL | Accuracy 1-KNN: 0.3241943241943242


Epoch 3/10: 100%|##########| 1090/1090 [15:34<00:00,  1.17it/s, loss=0.103]  


Epoch [3/10], Loss: 0.03366019608914305


Compute embeddings: 100%|##########| 1090/1090 [02:29<00:00,  7.29it/s]
Compute embeddings: 100%|##########| 260/260 [00:38<00:00,  6.78it/s]


TRAIN | Number of images: 8714 | Embedding shape: 1536
VAL   | Number of images: 2079   | Embedding shape: 1536
VAL | Accuracy 1-KNN: 0.33910533910533913


Epoch 4/10: 100%|##########| 1090/1090 [15:39<00:00,  1.16it/s, loss=0]       


Epoch [4/10], Loss: 0.02832290539774326


Compute embeddings: 100%|##########| 1090/1090 [02:27<00:00,  7.40it/s]
Compute embeddings: 100%|##########| 260/260 [00:37<00:00,  6.95it/s]


TRAIN | Number of images: 8714 | Embedding shape: 1536
VAL   | Number of images: 2079   | Embedding shape: 1536
VAL | Accuracy 1-KNN: 0.32756132756132755


Epoch 5/10: 100%|##########| 1090/1090 [15:28<00:00,  1.17it/s, loss=0.0573] 


Epoch [5/10], Loss: 0.026569484563869074


Compute embeddings: 100%|##########| 1090/1090 [02:27<00:00,  7.39it/s]
Compute embeddings: 100%|##########| 260/260 [00:37<00:00,  6.89it/s]


TRAIN | Number of images: 8714 | Embedding shape: 1536
VAL   | Number of images: 2079   | Embedding shape: 1536
VAL | Accuracy 1-KNN: 0.3588263588263588


Epoch 6/10: 100%|##########| 1090/1090 [15:33<00:00,  1.17it/s, loss=0]      


Epoch [6/10], Loss: 0.02482793127451468


Compute embeddings: 100%|##########| 1090/1090 [02:28<00:00,  7.35it/s]
Compute embeddings: 100%|##########| 260/260 [00:37<00:00,  6.87it/s]


TRAIN | Number of images: 8714 | Embedding shape: 1536
VAL   | Number of images: 2079   | Embedding shape: 1536
VAL | Accuracy 1-KNN: 0.354978354978355


Epoch 7/10: 100%|##########| 1090/1090 [15:24<00:00,  1.18it/s, loss=0]       


Epoch [7/10], Loss: 0.021873615581303015


Compute embeddings: 100%|##########| 1090/1090 [02:29<00:00,  7.29it/s]
Compute embeddings: 100%|##########| 260/260 [00:37<00:00,  6.86it/s]


TRAIN | Number of images: 8714 | Embedding shape: 1536
VAL   | Number of images: 2079   | Embedding shape: 1536
VAL | Accuracy 1-KNN: 0.3641173641173641


Epoch 8/10: 100%|##########| 1090/1090 [15:21<00:00,  1.18it/s, loss=0]       


Epoch [8/10], Loss: 0.02136812958558765


Compute embeddings: 100%|##########| 1090/1090 [02:31<00:00,  7.19it/s]
Compute embeddings: 100%|##########| 260/260 [00:37<00:00,  6.89it/s]


TRAIN | Number of images: 8714 | Embedding shape: 1536
VAL   | Number of images: 2079   | Embedding shape: 1536
VAL | Accuracy 1-KNN: 0.3645983645983646


Epoch 9/10: 100%|##########| 1090/1090 [15:25<00:00,  1.18it/s, loss=0]       


Epoch [9/10], Loss: 0.020225019518012574


Compute embeddings: 100%|##########| 1090/1090 [02:29<00:00,  7.27it/s]
Compute embeddings: 100%|##########| 260/260 [00:38<00:00,  6.80it/s]


TRAIN | Number of images: 8714 | Embedding shape: 1536
VAL   | Number of images: 2079   | Embedding shape: 1536
VAL | Accuracy 1-KNN: 0.3463203463203463


Epoch 10/10: 100%|##########| 1090/1090 [15:27<00:00,  1.17it/s, loss=0]       


Epoch [10/10], Loss: 0.018262770249072566


Compute embeddings: 100%|##########| 1090/1090 [02:30<00:00,  7.27it/s]
Compute embeddings: 100%|##########| 260/260 [00:37<00:00,  6.86it/s]


TRAIN | Number of images: 8714 | Embedding shape: 1536
VAL   | Number of images: 2079   | Embedding shape: 1536
VAL | Accuracy 1-KNN: 0.3641173641173641
Best model saved at: /lustre/fswork/projects/rech/ads/commun/kg_tests/experiments/megad_noswiss_full_3layers/2024-12-18_10-44-45/model_best_0.018.pth
Last model saved at: /lustre/fswork/projects/rech/ads/commun/kg_tests/experiments/megad_noswiss_full_3layers/2024-12-18_10-44-45/model_last_0.018.pth
Training completed. Now, start of evaluation on the model of the last epoch.
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.1.1+py3.11.5/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/xpython_1750797/17239407.py", line 2, in <module>
    train_dinov2.main(args)
  File "/lustre/fswork/projects/rech/ads/ssos023/DP-SCR_Identify-and-estimate-density-lynx-population/lynx_id/scripts/train/train_dinov2.py", line 294, in main
NameError: name 'val_eval_metrics' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.1.1+py3.11.5/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 2144, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/gpfslocalsup/pub/anaconda-py3/2023.09/envs/pytorch-gpu-2.