In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
from opensoundscape.ml.cnn import load_model
import sklearn
from glob import glob
from sklearn.metrics import average_precision_score, roc_auc_score

#set up plotting
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for large visuals
%config InlineBackend.figure_format = 'retina'

# opensoundscape transfer learning tools
from opensoundscape.ml.shallow_classifier import MLPClassifier, quick_fit, fit_classifier_on_embeddings

In [None]:
MODEL_PATH = "data/birdset_effnetB1_finetuned_19cls.model"   # your saved model
TRAIN_LABELS_CSV = "data/train_labels_5s_mac_frommeta.csv"
TEST_LABELS_CSV = "data/test_labels_5s_mac_frommeta.csv"
VAL_LABELS_CSV = "data/val_labels_5s_mac.csv"
OUT_PRED_CSV = "data/preds_val_birdset_finetuned.csv"       # optional
OUT_AP_AUROC_CSV = "/Volumes/Expansion/Evaluation/AP_AUROC_birdset_finetuned.csv"
filename = birdset_effnetB1_finetuned_19cls


In [None]:
train_labels = pd.read_csv(TRAIN_LABELS_CSV, index_col = [0,1,2])
val_labels = pd.read_csv(VAL_LABELS_CSV, index_col = [0,1,2])
test_labels = pd.read_csv(TEST_LABELS_CSV, index_col = [0,1,2])
val_labels.head()

# pick classes for predictions
class_list = train_labels.columns.tolist()

print(train_labels.shape, len(class_list))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Hypsipetes_madagascariensis,Copsychus_albospecularis,Coracopsis_nigra,Dicrurus_forficatus,Coua_caerulea,Zosterops_maderaspatanus,Eurystomus_glaucurus,Agapornis_canus,Saxicola_torquatus,Cyanolanius_madagascarinus,Leptopterus_chabert,Nesoenas_picturatus,Coua_reynaudii,Ceblepyris_cinereus,Neodrepanis_coruscans,Philepitta_castanea,Eulemur_sp,Coua_cristata,Treron_australis
file,start_time,end_time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
/mnt/class_data/group1_bioacoustics/sheila/cv4e_dataset/train/Marojejy/AGN_A/AGN_A-AGN08/AGN_A-AGN08_20230421_084000.WAV,0.0,2.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
/mnt/class_data/group1_bioacoustics/sheila/cv4e_dataset/train/Marojejy/AGN_A/AGN_A-AGN08/AGN_A-AGN08_20230421_084000.WAV,2.0,4.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
/mnt/class_data/group1_bioacoustics/sheila/cv4e_dataset/train/Marojejy/AGN_A/AGN_A-AGN08/AGN_A-AGN08_20230421_084000.WAV,4.0,6.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
/mnt/class_data/group1_bioacoustics/sheila/cv4e_dataset/train/Marojejy/AGN_A/AGN_A-AGN08/AGN_A-AGN08_20230421_084000.WAV,6.0,8.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
/mnt/class_data/group1_bioacoustics/sheila/cv4e_dataset/train/Marojejy/AGN_A/AGN_A-AGN08/AGN_A-AGN08_20230421_084000.WAV,8.0,10.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [None]:
#load model from save path
from opensoundscape.ml import load_model

model = load_model(MODEL_PATH, pickle=True)


  loaded_content = torch.load(path, map_location=device)


In [None]:
preds = m.predict(val_labels)

# Ensure it's a DataFrame with correct index/columns
if not isinstance(p_test, pd.DataFrame):
    p_test = pd.DataFrame(p_test, index=y_test.index, columns=classes)
else:
    p_test = p_test[classes].loc[y_test.index]

print(preds.shape)
preds.to_csv(OUT_PRED_CSV)


In [None]:
avprecscore_auroc_by_sp = pd.DataFrame(columns=['species', 'avg_precision_score','auroc_score'])
for species in class_list:
    avgscore = sklearn.metrics.average_precision_score(val_labels [species],preds [species])
    auroc = sklearn.metrics.roc_auc_score(val_labels[species],preds [species])
    new_row = pd.DataFrame({'species': [species], 'avg_precision_score': [avgscore], 'auroc_score': [auroc]})
    avprecscore_auroc_by_sp = pd.concat([avprecscore_auroc_by_sp, new_row], ignore_index=True)

print(avprecscore_auroc_by_sp)
avprecscore_auroc_by_sp.to_csv(OUT_AP_AUROC_CSV)

In [None]:
#set up plotting
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for large visuals
%config InlineBackend.figure_format = 'retina'
#write a loop to cycle through the class list and save histograms for each species for the model read in at the top of this notebook
path = '/Volumes/Expansion/Evaluation/histograms'
for species in class_list:
    speciespred = species + 'pred'
    scores_valid_df = val_labels.join(preds, rsuffix="pred")
    # Filter
    df_Pos = scores_valid_df[scores_valid_df[species] == True] #or whatever your class of interest is called
    df_NOT = scores_valid_df[scores_valid_df[species] == False]
    # Plot histograms
    plt.hist(df_NOT[speciespred],bins=20,alpha=0.5,label='negatives')
    plt.hist(df_Pos[speciespred],bins=20,alpha=0.5,label='positives')
    # Add a legend and labels
    plt.legend()
    plt.xlabel('Score')
    plt.ylabel('Frequency')
    # Show the plot
    plt.savefig(f'/Volumes/Expansion/Evaluation/histograms/{filename}_{species}.png')
    plt.clf()

<Figure size 1500x500 with 0 Axes>

In [None]:
#set up plotting on log scale on y axis - easier to see small sample sizes
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for large visuals
%config InlineBackend.figure_format = 'retina'
#write a loop to cycle through the class list and save histograms for each species for the model read in at the top of this notebook
path = '/Volumes/Expansion/Evaluation/histograms'
for species in class_list:
    speciespred = species + 'pred'
    scores_valid_df = val_labels.join(preds, rsuffix="pred")
    # Filter
    df_Pos = scores_valid_df[scores_valid_df[species] == True] #or whatever your class of interest is called
    df_NOT = scores_valid_df[scores_valid_df[species] == False]
    # Plot histograms
    plt.hist(df_NOT[speciespred],bins=20,alpha=0.5,label='negatives')
    plt.hist(df_Pos[speciespred],bins=20,alpha=0.5,label='positives')
    # Add a legend and labels
    plt.legend()
    plt.xlabel('Score')
    plt.ylabel('Frequency')
    plt.semilogy()
    # Save the plot
    plt.savefig(f'/Volumes/Expansion/Evaluation/histograms/semilog/{filename}_{species}.png')
    plt.clf()