In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
from opensoundscape.ml.cnn import load_model
import sklearn
from glob import glob
from sklearn.metrics import average_precision_score, roc_auc_score

#set up plotting
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for large visuals
%config InlineBackend.figure_format = 'retina'

# opensoundscape transfer learning tools
from opensoundscape.ml.shallow_classifier import MLPClassifier, quick_fit, fit_classifier_on_embeddings

In [3]:
MODEL_PATH = "data/birdset_effnetB1_finetuned_19cls.model"   # your saved model
TRAIN_LABELS_CSV = "data/train_labels_5s_mac_frommeta.csv"
TEST_LABELS_CSV = "data/test_labels_5s_mac_frommeta.csv"
VAL_LABELS_CSV = "data/val_labels_5s_mac.csv"
OUT_PRED_CSV = "data/preds_val_birdset_finetuned.csv"       # optional
OUT_AP_AUROC_CSV = "/Volumes/Expansion/Evaluation/AP_AUROC_birdset_finetuned.csv"
filename = 'birdset_effnetB1_finetuned_19cls'


In [13]:
train_labels = pd.read_csv(TRAIN_LABELS_CSV, index_col = [0,1,2])
val_labels = pd.read_csv(VAL_LABELS_CSV, index_col = [0,1,2])
test_labels = pd.read_csv(TEST_LABELS_CSV, index_col = [0,1,2])
val_labels.head()

# pick classes for predictions
class_list = val_labels.columns.tolist()

print(val_labels.shape, len(class_list))

(1476, 19) 19


In [7]:
#load model from save path
from opensoundscape.ml.cnn import load_model

model = load_model(MODEL_PATH)



In [14]:
preds = model.predict(val_labels)

# Ensure it's a DataFrame with correct index/columns
if not isinstance(preds, pd.DataFrame):
    preds = pd.DataFrame(preds, index=val_labels.index, columns=class_list)
else:
    preds = preds[class_list].loc[val_labels.index]

print(preds.shape)
preds.to_csv(OUT_PRED_CSV)


  0%|          | 0/1476 [00:00<?, ?it/s]

  super().__init__(loader)


(1476, 19)


In [15]:
avprecscore_auroc_by_sp = pd.DataFrame(columns=['species', 'avg_precision_score','auroc_score'])
for species in class_list:
    avgscore = sklearn.metrics.average_precision_score(val_labels [species],preds [species])
    auroc = sklearn.metrics.roc_auc_score(val_labels[species],preds [species])
    new_row = pd.DataFrame({'species': [species], 'avg_precision_score': [avgscore], 'auroc_score': [auroc]})
    avprecscore_auroc_by_sp = pd.concat([avprecscore_auroc_by_sp, new_row], ignore_index=True)

print(avprecscore_auroc_by_sp)
avprecscore_auroc_by_sp.to_csv(OUT_AP_AUROC_CSV)

                        species  avg_precision_score  auroc_score
0   Hypsipetes_madagascariensis             0.810638     0.880609
1      Copsychus_albospecularis             0.499097     0.912956
2              Coracopsis_nigra             0.277712     0.906065
3           Dicrurus_forficatus             0.385287     0.879804
4                 Coua_caerulea             0.016213     0.401273
5      Zosterops_maderaspatanus             0.762630     0.982265
6          Eurystomus_glaucurus             0.058483     0.879305
7               Agapornis_canus             0.005778     0.397408
8            Saxicola_torquatus             0.008757     0.395976
9    Cyanolanius_madagascarinus             0.005869     0.724015
10          Leptopterus_chabert             0.004794     0.214272
11          Nesoenas_picturatus             0.000000          NaN
12               Coua_reynaudii             0.000000          NaN
13          Ceblepyris_cinereus             0.001952     0.474559
14        

  avprecscore_auroc_by_sp = pd.concat([avprecscore_auroc_by_sp, new_row], ignore_index=True)


In [None]:
#set up plotting
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for large visuals
%config InlineBackend.figure_format = 'retina'
#write a loop to cycle through the class list and save histograms for each species for the model read in at the top of this notebook
path = '/Volumes/Expansion/Evaluation/histograms'
for species in class_list:
    speciespred = species + 'pred'
    scores_valid_df = val_labels.join(preds, rsuffix="pred")
    # Filter
    df_Pos = scores_valid_df[scores_valid_df[species] == True] #or whatever your class of interest is called
    df_NOT = scores_valid_df[scores_valid_df[species] == False]
    # Plot histograms
    plt.hist(df_NOT[speciespred],bins=20,alpha=0.5,label='negatives')
    plt.hist(df_Pos[speciespred],bins=20,alpha=0.5,label='positives')
    # Add a legend and labels
    plt.legend()
    plt.xlabel('Score')
    plt.ylabel('Frequency')
    # Show the plot
    plt.savefig(f'/Volumes/Expansion/Evaluation/histograms/{filename}_{species}.png')
    plt.clf()

<Figure size 1500x500 with 0 Axes>

In [None]:
#set up plotting on log scale on y axis - easier to see small sample sizes
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for large visuals
%config InlineBackend.figure_format = 'retina'
#write a loop to cycle through the class list and save histograms for each species for the model read in at the top of this notebook
path = '/Volumes/Expansion/Evaluation/histograms'
for species in class_list:
    speciespred = species + 'pred'
    scores_valid_df = val_labels.join(preds, rsuffix="pred")
    # Filter
    df_Pos = scores_valid_df[scores_valid_df[species] == True] #or whatever your class of interest is called
    df_NOT = scores_valid_df[scores_valid_df[species] == False]
    # Plot histograms
    plt.hist(df_NOT[speciespred],bins=20,alpha=0.5,label='negatives')
    plt.hist(df_Pos[speciespred],bins=20,alpha=0.5,label='positives')
    # Add a legend and labels
    plt.legend()
    plt.xlabel('Score')
    plt.ylabel('Frequency')
    plt.semilogy()
    # Save the plot
    plt.savefig(f'/Volumes/Expansion/Evaluation/histograms/semilog/{filename}_{species}.png')
    plt.clf()