In [1]:
import os
import numpy as np
import pandas as pd
import torchvision.transforms as T

from wildlife_datasets import datasets
from analysis import *
from utils import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = 'MegaDescriptor-L-384'
k = 5
root_datasets = '../wildlife-datasets/data'
img_size = int(model_name.split('-')[-1])

In [3]:
data = [
    (datasets.SarahZelvy, {'daytime': 'all'}),
    (datasets.SeaTurtleIDHeads, {'ignore_no_side': False, 'empty_database': False}),
    (datasets.SeaTurtleIDHeads, {'ignore_no_side': True, 'empty_database': False}),
    (datasets.SeaTurtleIDHeads, {'ignore_no_side': False, 'empty_database': True}),
    (datasets.SeaTurtleIDHeads, {'ignore_no_side': True, 'empty_database': True}),
    (datasets.ZindiTurtleRecall, {'ignore_no_side': False, 'empty_database': False}),
    (datasets.ZindiTurtleRecall, {'ignore_no_side': True, 'empty_database': False}),
    (datasets.ZindiTurtleRecall, {'ignore_no_side': False, 'empty_database': True}),
    (datasets.ZindiTurtleRecall, {'ignore_no_side': True, 'empty_database': True}),
]

for convert_grayscale in [False, True]:
    if convert_grayscale:
        transform = T.Compose([T.Grayscale(3), T.Resize([img_size, img_size]), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])    
    else:
        transform = T.Compose([T.Resize([img_size, img_size]), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])

    for dataset_class, pars in data:
        dataset_name = dataset_class.__name__
        root = os.path.join(root_datasets, dataset_name)
        file_name = os.path.join('features', f'features_{dataset_name}_{convert_grayscale}_{model_name}.npy')

        d = dataset_class(root)
        dataset = WD(d.df, d.root, transform=transform)
        extractor = get_extractor(model_name='hf-hub:BVRA/'+model_name, batch_size=32, device='cpu')
        features = get_normalized_features(file_name, dataset, extractor)

        if dataset_name == 'SarahZelvy':
            analysis = Analysis_SarahZelvy()
            idx_database, idx_query = analysis.get_split(d.df, daytime=pars['daytime'])
        elif dataset_name in ['SeaTurtleIDHeads', 'ZindiTurtleRecall']:
            if dataset_name == 'SeaTurtleIDHeads':
                analysis = Analysis_SeaTurtleIDHeads()
            else:
                analysis = Analysis_ZindiTurtleRecall()                
            if pars['ignore_no_side']:
                idx_unknown_side = d.df['orientation'].apply(lambda x: x not in analysis.sides.keys())
            else:
                idx_unknown_side = np.zeros(len(d.df), dtype=bool)
            idx_unknown_identity = d.df['identity'] == 'unknown'
            idx_ignore = idx_unknown_side + idx_unknown_identity
            idx_database, idx_query = analysis.get_split(d.df, idx_ignore=idx_ignore, empty_database=pars['empty_database'])
        else:
            raise Exception('Dataset not known')

        if len(idx_database) == 0:
            idx_true, idx_pred = compute_predictions_disjoint(features[idx_query], k=k)
            idx_true = idx_query[idx_true]
            idx_pred = idx_query[idx_pred]
        else:
            idx_true, idx_pred = compute_predictions_closed(features[idx_query], features[idx_database], k=k)
            idx_true = idx_query[idx_true]
            idx_pred = idx_database[idx_pred]

        y_true = dataset.labels_string[idx_true]
        y_pred = dataset.labels_string[idx_pred]
        orientation_true = dataset.metadata['orientation'].to_numpy()[idx_true]
        orientation_pred = dataset.metadata['orientation'].to_numpy()[idx_pred]

        matches = analysis.get_matches(y_true, y_pred, orientation_true, orientation_pred)
        print(dataset_name, convert_grayscale, pars)
        print(f"Database size = {len(idx_database)}. Query size = {len(idx_query)}.")
        print(f"Database individuals = {d.df.iloc[idx_database]['identity'].nunique()}. Query individuals = {d.df.iloc[idx_query]['identity'].nunique()}.")
        display(matches)

SarahZelvy False {'daytime': 'all'}
Database size = 0. Query size = 489.
Database individuals = 0. Query individuals = 60.


Unnamed: 0,match same side,match diff side,other,wrong match
match k = 1,63.19%,10.43%,0.0%,26.38%
match k = 2,37.63%,20.86%,0.0%,41.51%
match k = 3,28.63%,19.22%,0.0%,52.15%
match k = 4,20.86%,15.34%,0.0%,63.8%
match k = 5,14.93%,13.5%,0.0%,71.57%


SeaTurtleIDHeads False {'ignore_no_side': False, 'empty_database': False}
Database size = 6063. Query size = 1519.
Database individuals = 400. Query individuals = 398.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,match diff = 3,match diff = 4,other,wrong match
match k = 1,61.42%,17.84%,3.36%,2.24%,4.41%,1.65%,9.08%
match k = 2,49.31%,20.08%,5.53%,5.46%,8.03%,1.58%,10.01%
match k = 3,38.05%,21.86%,6.91%,9.15%,11.85%,1.51%,10.66%
match k = 4,34.56%,20.93%,9.08%,9.94%,12.38%,1.65%,11.45%
match k = 5,30.68%,19.75%,8.49%,10.66%,15.87%,1.71%,12.84%


SeaTurtleIDHeads False {'ignore_no_side': True, 'empty_database': False}
Database size = 5967. Query size = 1494.
Database individuals = 400. Query individuals = 397.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,match diff = 3,match diff = 4,other,wrong match
match k = 1,62.52%,18.21%,3.55%,2.28%,4.69%,0.0%,8.77%
match k = 2,50.47%,20.55%,5.62%,5.56%,8.17%,0.0%,9.64%
match k = 3,38.69%,22.29%,7.3%,9.37%,11.98%,0.0%,10.37%
match k = 4,35.34%,21.15%,9.17%,10.24%,12.92%,0.0%,11.18%
match k = 5,31.26%,20.15%,8.7%,10.84%,16.4%,0.0%,12.65%


SeaTurtleIDHeads False {'ignore_no_side': False, 'empty_database': True}
Database size = 0. Query size = 1519.
Database individuals = 0. Query individuals = 398.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,match diff = 3,match diff = 4,other,wrong match
match k = 1,35.22%,18.1%,6.12%,10.2%,12.64%,1.51%,16.19%
match k = 2,23.17%,16.33%,7.11%,11.13%,14.75%,1.51%,26.0%
match k = 3,18.37%,12.84%,7.7%,11.13%,13.89%,1.32%,34.76%
match k = 4,14.15%,11.19%,5.99%,8.82%,13.03%,1.38%,45.42%
match k = 5,10.73%,8.62%,7.11%,10.14%,9.22%,2.3%,51.88%


SeaTurtleIDHeads False {'ignore_no_side': True, 'empty_database': True}
Database size = 0. Query size = 1494.
Database individuals = 0. Query individuals = 397.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,match diff = 3,match diff = 4,other,wrong match
match k = 1,35.88%,18.47%,6.29%,10.37%,13.12%,0.0%,15.86%
match k = 2,23.83%,16.53%,7.23%,11.31%,14.99%,0.0%,26.1%
match k = 3,18.67%,13.25%,7.76%,11.51%,14.19%,0.0%,34.61%
match k = 4,14.32%,11.38%,6.09%,9.04%,13.45%,0.0%,45.72%
match k = 5,10.84%,8.7%,7.63%,10.44%,9.17%,0.0%,53.21%


ZindiTurtleRecall False {'ignore_no_side': False, 'empty_database': False}
Database size = 9721. Query size = 3082.
Database individuals = 2265. Query individuals = 2251.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,other,wrong match
match k = 1,5.45%,2.43%,2.43%,65.12%,24.56%
match k = 2,4.96%,2.69%,2.47%,51.95%,37.93%
match k = 3,4.38%,3.18%,2.53%,32.77%,57.14%
match k = 4,4.15%,2.92%,2.95%,26.7%,63.27%
match k = 5,3.83%,3.54%,2.3%,23.62%,66.71%


ZindiTurtleRecall False {'ignore_no_side': True, 'empty_database': False}
Database size = 1710. Query size = 435.
Database individuals = 100. Query individuals = 100.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,other,wrong match
match k = 1,45.52%,22.53%,21.84%,0.0%,10.11%
match k = 2,41.15%,24.37%,21.38%,0.0%,13.1%
match k = 3,38.16%,27.59%,22.53%,0.0%,11.72%
match k = 4,35.4%,30.57%,22.76%,0.0%,11.26%
match k = 5,33.33%,31.95%,20.0%,0.0%,14.71%


ZindiTurtleRecall False {'ignore_no_side': False, 'empty_database': True}
Database size = 0. Query size = 3082.
Database individuals = 0. Query individuals = 2251.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,other,wrong match
match k = 1,3.24%,3.34%,2.14%,15.96%,75.31%
match k = 2,2.24%,2.89%,1.65%,10.77%,82.45%
match k = 3,1.98%,3.05%,1.65%,8.96%,84.36%
match k = 4,2.01%,1.91%,1.62%,7.33%,87.12%
match k = 5,1.78%,2.24%,0.91%,6.1%,88.97%


ZindiTurtleRecall False {'ignore_no_side': True, 'empty_database': True}
Database size = 0. Query size = 435.
Database individuals = 0. Query individuals = 100.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,other,wrong match
match k = 1,28.51%,31.49%,20.69%,0.0%,19.31%
match k = 2,20.69%,28.74%,17.93%,0.0%,32.64%
match k = 3,21.61%,25.06%,12.18%,0.0%,41.15%
match k = 4,18.16%,20.69%,14.48%,0.0%,46.67%
match k = 5,14.25%,20.23%,11.72%,0.0%,53.79%


SarahZelvy True {'daytime': 'all'}
Database size = 0. Query size = 489.
Database individuals = 0. Query individuals = 60.


Unnamed: 0,match same side,match diff side,other,wrong match
match k = 1,62.37%,11.04%,0.0%,26.58%
match k = 2,33.54%,17.18%,0.0%,49.28%
match k = 3,28.02%,18.81%,0.0%,53.17%
match k = 4,21.88%,17.18%,0.0%,60.94%
match k = 5,21.27%,13.91%,0.0%,64.83%


SeaTurtleIDHeads True {'ignore_no_side': False, 'empty_database': False}
Database size = 6063. Query size = 1519.
Database individuals = 400. Query individuals = 398.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,match diff = 3,match diff = 4,other,wrong match
match k = 1,57.74%,13.36%,1.97%,1.65%,4.48%,1.05%,19.75%
match k = 2,39.7%,14.94%,3.29%,4.15%,8.36%,0.86%,28.7%
match k = 3,32.06%,14.22%,4.34%,5.0%,10.2%,0.92%,33.25%
match k = 4,26.27%,12.18%,4.48%,5.92%,11.39%,0.66%,39.1%
match k = 5,22.19%,12.9%,4.02%,6.58%,12.11%,0.59%,41.61%


SeaTurtleIDHeads True {'ignore_no_side': True, 'empty_database': False}
Database size = 5967. Query size = 1494.
Database individuals = 400. Query individuals = 397.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,match diff = 3,match diff = 4,other,wrong match
match k = 1,58.84%,13.59%,2.01%,1.67%,4.62%,0.0%,19.28%
match k = 2,40.29%,15.19%,3.48%,4.28%,8.5%,0.0%,28.25%
match k = 3,32.66%,14.46%,4.48%,5.02%,10.37%,0.0%,33.0%
match k = 4,26.71%,12.45%,4.55%,6.02%,11.71%,0.0%,38.55%
match k = 5,22.82%,13.19%,4.08%,6.76%,12.32%,0.0%,40.83%


SeaTurtleIDHeads True {'ignore_no_side': False, 'empty_database': True}
Database size = 0. Query size = 1519.
Database individuals = 0. Query individuals = 398.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,match diff = 3,match diff = 4,other,wrong match
match k = 1,31.53%,10.99%,2.37%,4.94%,9.35%,0.92%,39.89%
match k = 2,18.83%,8.43%,2.76%,6.12%,9.55%,0.46%,53.85%
match k = 3,12.57%,7.31%,2.7%,6.32%,9.68%,0.07%,61.36%
match k = 4,10.66%,7.31%,2.44%,5.33%,7.97%,0.26%,66.03%
match k = 5,8.1%,6.45%,1.84%,4.81%,7.11%,0.26%,71.43%


SeaTurtleIDHeads True {'ignore_no_side': True, 'empty_database': True}
Database size = 0. Query size = 1494.
Database individuals = 0. Query individuals = 397.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,match diff = 3,match diff = 4,other,wrong match
match k = 1,32.13%,11.18%,2.41%,5.02%,9.57%,0.0%,39.69%
match k = 2,19.14%,8.57%,2.81%,6.22%,9.77%,0.0%,53.48%
match k = 3,12.85%,7.43%,2.74%,6.49%,9.77%,0.0%,60.71%
match k = 4,10.84%,7.43%,2.48%,5.42%,8.23%,0.0%,65.6%
match k = 5,8.17%,6.69%,1.87%,4.82%,7.03%,0.0%,71.42%


ZindiTurtleRecall True {'ignore_no_side': False, 'empty_database': False}
Database size = 9721. Query size = 3082.
Database individuals = 2265. Query individuals = 2251.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,other,wrong match
match k = 1,3.96%,1.14%,1.65%,28.65%,64.6%
match k = 2,2.53%,0.71%,1.46%,15.25%,80.05%
match k = 3,2.43%,0.71%,1.2%,11.45%,84.2%
match k = 4,1.78%,0.71%,1.33%,8.99%,87.18%
match k = 5,1.49%,0.68%,1.07%,8.08%,88.68%


ZindiTurtleRecall True {'ignore_no_side': True, 'empty_database': False}
Database size = 1710. Query size = 435.
Database individuals = 100. Query individuals = 100.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,other,wrong match
match k = 1,37.24%,11.72%,15.4%,0.0%,35.63%
match k = 2,26.44%,8.97%,15.86%,0.0%,48.74%
match k = 3,22.53%,10.11%,12.41%,0.0%,54.94%
match k = 4,19.31%,8.05%,13.56%,0.0%,59.08%
match k = 5,14.71%,9.89%,11.72%,0.0%,63.68%


ZindiTurtleRecall True {'ignore_no_side': False, 'empty_database': True}
Database size = 0. Query size = 3082.
Database individuals = 0. Query individuals = 2251.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,other,wrong match
match k = 1,1.98%,0.68%,1.23%,5.58%,90.53%
match k = 2,1.04%,0.39%,0.75%,3.73%,94.09%
match k = 3,1.04%,0.26%,0.68%,2.56%,95.46%
match k = 4,0.55%,0.62%,0.49%,2.73%,95.62%
match k = 5,0.65%,0.52%,0.52%,1.75%,96.56%


ZindiTurtleRecall True {'ignore_no_side': True, 'empty_database': True}
Database size = 0. Query size = 435.
Database individuals = 0. Query individuals = 100.


Unnamed: 0,match diff = 0,match diff = 1,match diff = 2,other,wrong match
match k = 1,18.16%,7.36%,12.41%,0.0%,62.07%
match k = 2,13.79%,7.59%,8.51%,0.0%,70.11%
match k = 3,8.97%,6.67%,5.52%,0.0%,78.85%
match k = 4,6.67%,5.98%,5.98%,0.0%,81.38%
match k = 5,3.91%,5.52%,5.75%,0.0%,84.83%
