In [81]:
import os
import numpy as np
import pandas as pd
from importlib import reload
import animaloc_improved.tools.infer_metrics as im

if os.getcwd().endswith('notebooks'):
    os.chdir('..')


In [75]:
df = pd.read_csv("data/gt-preprocessed/stats-by-image/big_size_A_B_E_K_WH_WB-fixed-header.csv")

In [76]:
df.head()

Unnamed: 0,split,subdataset,images,rel_path,num_bboxes,median_l_bbox,dominant_species,counts_by_label,has_mixed_species,median_animal_sep,normalized_animal_sep
0,train,AED,11d4e563d06dfd47459ac2a088bcc179e23c34a4.JPG,train/11d4e563d06dfd47459ac2a088bcc179e23c34a4...,3,56.444663,6,"{""6"": 3}",False,28.780516,0.509889
1,train,Virunga,L_10_05_16_DSC01530.JPG,train/L_10_05_16_DSC01530.JPG,2,72.671531,2,"{""2"": 2}",False,47.628773,0.655398
2,train,Virunga,L_10_05_16_DSC01529.JPG,train/L_10_05_16_DSC01529.JPG,2,74.434293,2,"{""2"": 2}",False,49.307707,0.662433
3,train,Virunga,S_09_05_16_DSC00800.JPG,train/S_09_05_16_DSC00800.JPG,3,33.196385,4,"{""4"": 3}",False,23.517594,0.708438
4,train,Virunga,L_11_05_16_DSC01393.JPG,train/L_11_05_16_DSC01393.JPG,2,46.159143,3,"{""3"": 2}",False,33.24154,0.720151


In [77]:
animal_sep = df["normalized_animal_sep"]

a_sep_lt20 = animal_sep[animal_sep < 20]

a_sep_lt20.hist(bins=50)

<Axes: >

In [78]:
def calc_proximity_class(row: pd.Series):
    animal_sep = row["normalized_animal_sep"]

    if animal_sep < 3.0:
        return "1-alta"
    elif animal_sep < 10:
        return "2-media"
    else:
        return "3-baja"

df['proximity_class'] = df.apply(calc_proximity_class, axis=1)

pd.DataFrame(df['proximity_class'].value_counts()).sort_values('proximity_class')

Unnamed: 0_level_0,count
proximity_class,Unnamed: 1_level_1
1-alta,199
2-media,413
3-baja,685


In [79]:
import json
from pathlib import Path

inference_path = Path("data/test_results_v2/herdnet_v2_hn2/inference.json")
with open(inference_path, "rb") as f_in:
    inference_results = json.load(f_in)

print(len(inference_results))

258


In [89]:
reload(im)
inf_results_by_proximity = {}

for prox_class in sorted(df['proximity_class'].unique()):
    images_class = set(df.loc[df['proximity_class'] == prox_class]['images'])
    subset = []
    for result in inference_results:
        if result['images'] in images_class:
            subset.append(result)

    inf_results_by_proximity[prox_class] = subset
    print(prox_class, len(subset))

match_strategy = "point2point"
match_tolerance = 10

evaluator = im.PrecisionRecallEvaluator(
    match_strategy=match_strategy,
    match_tolerance=match_tolerance,
    species_map=im.SPECIES_MAP,
)

pieces = []
for prox_class, inf_results in inf_results_by_proximity.items():
    by_image_results = im.compute_by_image_results(evaluator, inf_results)
    precis_rec_df = im.calc_precis_recall(by_image_results)

    piece = precis_rec_df.loc[precis_rec_df.species == 'binary'].copy()
    piece['Proximidad'] = prox_class
    pieces.append(piece)

    # display(precis_rec_df)

result_df = pd.concat(pieces)

for metric in ["precision", "recall", "f1_score"]:
    result_df[metric] = np.round(100 * result_df[metric], 1)

display(result_df)

latex_df = (
    result_df[
        ['Proximidad', 'num_gt_annots', 'precision', 'recall', 'f1_score']
    ].rename(columns={
        "num_gt_annots": "Anotaciones\n\\#",
        "precision": "Precisión\n\\%",
        "recall": "Recall\n\\%",
        "f1_score": "Puntaje F1\n\\%"
        }
    )
)

from animaloc_improved.tools import latex_utils as lu

print(lu.df_to_latex(
    latex_df,
    caption="Métricas de precisión, recall y f1 por clasificación de proximidad.",
    label="pr-metrics-by-proximity"
))

1-alta 36
2-media 87
3-baja 135


Unnamed: 0,label,species,TP,FP,FN,num_gt_annots,num_preds,precision,recall,f1_score,Proximidad
0,0,binary,306,214,254,560,520,58.8,54.6,56.7,1-alta
0,0,binary,860,355,385,1245,1215,70.8,69.1,69.9,2-media
0,0,binary,411,211,83,494,622,66.1,83.2,73.7,3-baja


\begin{table}[h!]
\renewcommand{\arraystretch}{1.3}
\centering
\begin{tabular}{c c c c c}
\hline
\textbf{Proximidad} & \makecell{\textbf{Anotaciones}\\\textbf{\#}} & \makecell{\textbf{Precisión}\\\textbf{\%}} & \makecell{\textbf{Recall}\\\textbf{\%}} & \makecell{\textbf{Puntaje F1}\\\textbf{\%}}\\
\hline
1-alta & 560 & 58.8 & 54.6 & 56.7\\
2-media & 1245 & 70.8 & 69.1 & 69.9\\
3-baja & 494 & 66.1 & 83.2 & 73.7\\
\hline
\end{tabular}
\caption{Métricas de precisión, recall y f1 por clasificación de proximidad.}
\label{pr-metrics-by-proximity}
\end{table}


In [84]:
by_image_results = im.compute_by_image_results(evaluator, inference_results)
precis_rec_df = im.calc_precis_recall(by_image_results)
precis_rec_df.head()

Unnamed: 0,label,species,TP,FP,FN,num_gt_annots,num_preds,precision,recall,f1_score
0,0,binary,1577,780,722,2299,2357,0.669071,0.68595,0.677405
1,1,Alcelaphinae,492,250,183,675,742,0.663073,0.728889,0.694425
2,2,Buffalo,145,71,204,349,216,0.671296,0.415473,0.513274
3,3,Kob,355,110,122,477,465,0.763441,0.744235,0.753715
4,4,Warthog,20,52,54,74,72,0.277778,0.27027,0.273973


In [111]:
# inf_result = inf_results[0]
del inf_result
# %%


display(pred_cnts_df)


Unnamed: 0,images,pred_count,labels
0,S_07_05_16_DSC00094.JPG,0,1
1,S_07_05_16_DSC00346.JPG,2,1
2,S_07_05_16_DSC00347.JPG,1,1
3,S_07_05_16_DSC00348.JPG,1,1
4,S_07_05_16_DSC00365.JPG,0,1
...,...,...,...
130,3ad215bcd345fc3e7cf20950b08d127835417d5e.JPG,5,1
131,3e3bfda0742d41b377d0c72012142c5873d3c972.JPG,0,1
132,3f8c96422a94b06065b41e43e787d856ecc3dcaa.JPG,6,1
133,4140a7c83fb08a7c92ac55f119bccdb29937367c.JPG,0,1


In [107]:
from animaloc_improved.tools import count_metrics_v2 as cm2

In [119]:
reload(cm2)
pieces = []

for prox_class, inf_results in inf_results_by_proximity.items():
    merged = cm2.count_errors_by_image(inf_results)
    piece = cm2._calc_means_by_label(merged)
    piece['Proximidad'] = prox_class

    print(prox_class, merged.shape)
    # display(piece)
    pieces.append(piece)


cnt_metrics_by_prox = pd.concat(pieces)
cnt_metrics_by_prox


1-alta (36, 9)
2-media (87, 9)
3-baja (135, 9)


Unnamed: 0,labels,M(C),Σ(C),M(Ĉ),Σ(Ĉ),ME,MAE,MSE,MPE,MAPE,RMSE,Proximidad
0,1,15.6,560,14.4,520,-1.1,5.6,77.2,31.6,67.2,8.8,1-alta
0,1,14.3,1245,14.0,1215,-0.3,2.9,15.3,-0.4,29.6,3.9,2-media
0,1,3.7,494,4.6,622,0.9,1.6,7.8,68.0,98.7,2.8,3-baja


In [122]:
latex_df = cnt_metrics_by_prox["Proximidad,M(Ĉ),Σ(Ĉ),ME,MAE,RMSE,MPE,MAPE".split(",")]

# Renames to prepare for latex output
latex_df = latex_df.rename(
    columns={
        "M(Ĉ)": r"$\mbox{M}(\hat{C})$",
        "Σ(Ĉ)": r"$\mathbf{\Sigma}(\hat{C})$",
        "MPE": "MPE\n[\\%]",
        "MAPE": "MAPE\n[\\%]",
    }
)


from animaloc_improved.tools import latex_utils as lu
print(
    lu.df_to_latex(
        latex_df,
        caption=(
            "Métricas de Conteo por clasificación , caso binario, sobre el conjunto de test, "
            "usando el modelo HN-2."
        ),
    )
)

\begin{table}[h!]
\renewcommand{\arraystretch}{1.3}
\centering
\begin{tabular}{c c c c c c c c}
\hline
\textbf{Proximidad} & \textbf{$\mbox{M}(\hat{C})$} & \textbf{$\mathbf{\Sigma}(\hat{C})$} & \textbf{ME} & \textbf{MAE} & \textbf{RMSE} & \makecell{\textbf{MPE}\\\textbf{[\%]}} & \makecell{\textbf{MAPE}\\\textbf{[\%]}}\\
\hline
1-alta & 14.4 & 520 & -1.1 & 5.6 & 8.8 & 31.6 & 67.2\\
2-media & 14.0 & 1215 & -0.3 & 2.9 & 3.9 & -0.4 & 29.6\\
3-baja & 4.6 & 622 & 0.9 & 1.6 & 2.8 & 68.0 & 98.7\\
\hline
\end{tabular}
\caption{Métricas de Conteo por clasificación , caso binario, sobre el conjunto de test, usando el modelo HN-2.}
\end{table}
