In [7]:
import torch
import pandas as pd
from pathlib import Path
import numpy as np
import pandas as pd
import random
from glob import glob
import sklearn

from tqdm.autonotebook import tqdm
from sklearn.metrics import average_precision_score, roc_auc_score
from pathlib import Path

#set up plotting
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for large visuals
%config InlineBackend.figure_format = 'retina'

# opensoundscape transfer learning tools
from opensoundscape.ml.shallow_classifier import MLPClassifier, quick_fit, fit_classifier_on_embeddings


In [8]:
from sklearn.model_selection import train_test_split
from opensoundscape import BoxedAnnotations, CNN
import opensoundscape

In [9]:
n_samples_per_class = 1000
filename = 'Shallow_classifier_perch_resample_formac'

In [10]:
metadata = pd.read_csv('./data/metadata.csv')
metadata.head()

Unnamed: 0.1,Unnamed: 0,Begin Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Species,Conf,Call_type,Filename,SiteHab,RecordingSched,RecorderName,DP,SoundFile,SoundFile_path,Split,Raven_path
0,0,0.763354,3.078034,2854.369,5965.41,Philepitta_castanea,,,AGN_A-AGN04_20230429_054000.txt,AGN,A,AGN_A-AGN04,AGN04,AGN_A-AGN04_20230429_054000.WAV,/mnt/class_data/group1_bioacoustics/sheila/cv4...,validation,/mnt/class_data/group1_bioacoustics/sheila/Pro...
1,1,6.588949,9.475884,2571.297,5451.15,Philepitta_castanea,,,AGN_A-AGN04_20230429_054000.txt,AGN,A,AGN_A-AGN04,AGN04,AGN_A-AGN04_20230429_054000.WAV,/mnt/class_data/group1_bioacoustics/sheila/cv4...,validation,/mnt/class_data/group1_bioacoustics/sheila/Pro...
2,2,10.43478,12.523959,3085.557,5348.299,Philepitta_castanea,,,AGN_A-AGN04_20230429_054000.txt,AGN,A,AGN_A-AGN04,AGN04,AGN_A-AGN04_20230429_054000.WAV,/mnt/class_data/group1_bioacoustics/sheila/cv4...,validation,/mnt/class_data/group1_bioacoustics/sheila/Pro...
3,3,14.854197,15.436757,3085.557,5862.558,Philepitta_castanea,,,AGN_A-AGN04_20230429_054000.txt,AGN,A,AGN_A-AGN04,AGN04,AGN_A-AGN04_20230429_054000.WAV,/mnt/class_data/group1_bioacoustics/sheila/cv4...,validation,/mnt/class_data/group1_bioacoustics/sheila/Pro...
4,4,55.57803,59.479764,2621.359,5475.728,Philepitta_castanea,,,AGN_A-AGN04_20230429_054000.txt,AGN,A,AGN_A-AGN04,AGN04,AGN_A-AGN04_20230429_054000.WAV,/mnt/class_data/group1_bioacoustics/sheila/cv4...,validation,/mnt/class_data/group1_bioacoustics/sheila/Pro...


In [11]:
val_metadata=metadata[metadata["Split"]=="validation"]
train_metadata=metadata[metadata["Split"]=="train"]
test_metadata=metadata[metadata["Split"]=="test"]

In [12]:
val_metadata=val_metadata.reset_index()
train_metadata=train_metadata.reset_index()
test_metadata=test_metadata.reset_index()

In [13]:
# assume we have a list of raven annotation files and corresponding audio files
# load the annotations into OpenSoundscape
raven_file_paths = val_metadata['Raven_path']
audio_file_paths = val_metadata['SoundFile_path']
val_annotations = BoxedAnnotations.from_raven_files(raven_file_paths,'species',audio_file_paths)

raven_file_paths = train_metadata['Raven_path']
audio_file_paths = train_metadata['SoundFile_path']
train_annotations = BoxedAnnotations.from_raven_files(raven_file_paths,'species',audio_file_paths)

raven_file_paths = test_metadata['Raven_path']
audio_file_paths = test_metadata['SoundFile_path']
test_annotations = BoxedAnnotations.from_raven_files(raven_file_paths,'species',audio_file_paths)

In [14]:
# Create the table with a dataframe
conversion_table = pd.DataFrame(
    {'original':['Eulemur_albifrons', 'Eulemur_fulvus'],
     'new':['Eulemur_sp', 'Eulemur_sp']}
)

# Or create the table in its own spreadsheet
#conversion_table = pd.read_csv('my_conversion_filename_here.csv')

conversion_table

Unnamed: 0,original,new
0,Eulemur_albifrons,Eulemur_sp
1,Eulemur_fulvus,Eulemur_sp


In [15]:
#correct annotations in each of the splits
val_annotations_corrected = val_annotations.convert_labels(conversion_table)
val_annotations_corrected.audio_files = val_annotations_corrected.df['audio_file'].values #workaround for issue #872
val_annotations_corrected.df.head()

train_annotations_corrected = train_annotations.convert_labels(conversion_table)
train_annotations_corrected.audio_files = train_annotations_corrected.df['audio_file'].values #workaround for issue #872
train_annotations_corrected.df.head()

test_annotations_corrected = test_annotations.convert_labels(conversion_table)
test_annotations_corrected.audio_files = test_annotations_corrected.df['audio_file'].values #workaround for issue #872
test_annotations_corrected.df.head()


Unnamed: 0,audio_file,annotation_file,annotation,start_time,end_time,low_f,high_f,Conf,Selection,Call_type,Delta Time (s),Delta Freq (Hz),View,Channel,Avg Power Density (dB FS/Hz)
0,/mnt/class_data/group1_bioacoustics/sheila/cv4...,/mnt/class_data/group1_bioacoustics/sheila/Pro...,Hypsipetes_madagascariensis,54.905264,56.190913,1968.454,5223.975,0.9,1,call,,,Spectrogram 1,1,
1,/mnt/class_data/group1_bioacoustics/sheila/cv4...,/mnt/class_data/group1_bioacoustics/sheila/Pro...,Dicrurus_forficatus,4.943181,11.329778,659.794,5690.722,0.9,1,call,,,Spectrogram 1,1,
2,/mnt/class_data/group1_bioacoustics/sheila/cv4...,/mnt/class_data/group1_bioacoustics/sheila/Pro...,Dicrurus_forficatus,13.011363,15.447871,551.7,5773.196,0.9,2,call,,,Spectrogram 1,1,
3,/mnt/class_data/group1_bioacoustics/sheila/cv4...,/mnt/class_data/group1_bioacoustics/sheila/Pro...,Dicrurus_forficatus,17.215908,19.923388,494.845,5773.196,0.9,3,call,,,Spectrogram 1,1,
4,/mnt/class_data/group1_bioacoustics/sheila/cv4...,/mnt/class_data/group1_bioacoustics/sheila/Pro...,Dicrurus_forficatus,24.28254,29.043458,742.268,6103.093,0.9,4,call,,,Spectrogram 1,1,


In [16]:
# pick classes to train the model on. These should occur in the annotated data
class_list = ['Hypsipetes_madagascariensis','Copsychus_albospecularis','Coracopsis_nigra','Dicrurus_forficatus','Coua_caerulea','Zosterops_maderaspatanus','Eurystomus_glaucurus','Agapornis_canus','Saxicola_torquatus','Cyanolanius_madagascarinus','Leptopterus_chabert','Nesoenas_picturatus','Coua_reynaudii','Ceblepyris_cinereus','Neodrepanis_coruscans','Philepitta_castanea','Eulemur_sp','Coua_cristata','Treron_australis']

In [17]:
# create labels for fixed-duration (5 second) clips
val_labels = val_annotations_corrected.clip_labels(
  clip_duration=5,
  clip_overlap=0,
  min_label_overlap=0.25,
  class_subset=class_list
)

train_labels = train_annotations_corrected.clip_labels(
  clip_duration=5,
  clip_overlap=0,
  min_label_overlap=0.25,
  class_subset=class_list
)

test_labels = test_annotations_corrected.clip_labels(
  clip_duration=5,
  clip_overlap=0,
  min_label_overlap=0.25,
  class_subset=class_list
)

In [18]:
train_labels_resampled = opensoundscape.data_selection.resample(train_labels,n_samples_per_class = n_samples_per_class, downsample = False,upsample = True)

In [19]:
from bioacoustics_model_zoo import Perch

In [20]:
perch_model = Perch()

                    This architecture is not listed in opensoundscape.ml.cnn_architectures.ARCH_DICT.
                    It will not be available for loading after saving the model with .save() (unless using pickle=True). 
                    To make it re-loadable, define a function that generates the architecture from arguments: (n_classes, n_channels) 
                    then use opensoundscape.ml.cnn_architectures.register_architecture() to register the generating function.

                    The function can also set the returned object's .constructor_name to the registered string key in ARCH_DICT

                    See opensoundscape.ml.cnn_architectures module for examples of constructor functions
                    


In [21]:
emb_train = perch_model.embed(train_labels_resampled, return_dfs=False, batch_size=128, num_workers=0)
emb_val = perch_model.embed(val_labels, return_dfs=False, batch_size=128, num_workers=0)

  0%|          | 0/161 [00:00<?, ?it/s]

I0000 00:00:1751653977.876015   22151 service.cc:145] XLA service 0x30650e90 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1751653977.876078   22151 service.cc:153]   StreamExecutor device (0): Host, Default Version
2025-07-04 18:32:58.092574: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
W0000 00:00:1751653978.101078   22151 assert_op.cc:38] Ignoring Assert operator jax2tf_infer_fn_/assert_equal_1/Assert/AssertGuard/Assert
I0000 00:00:1751653980.418501   22151 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1751659093.901533   22151 assert_op.cc:38] Ignoring Assert operator jax2tf_infer_fn_/assert_equal_1/Assert/AssertGuard/Assert


  0%|          | 0/12 [00:00<?, ?it/s]

W0000 00:00:1751659465.692324   22151 assert_op.cc:38] Ignoring Assert operator jax2tf_infer_fn_/assert_equal_1/Assert/AssertGuard/Assert


In [22]:
# pick classes to train the model on. These should occur in the annotated data
class_list = ['Hypsipetes_madagascariensis','Copsychus_albospecularis','Coracopsis_nigra','Dicrurus_forficatus','Coua_caerulea','Zosterops_maderaspatanus','Eurystomus_glaucurus','Agapornis_canus','Saxicola_torquatus','Cyanolanius_madagascarinus','Leptopterus_chabert','Nesoenas_picturatus','Coua_reynaudii','Ceblepyris_cinereus','Neodrepanis_coruscans','Philepitta_castanea','Eulemur_sp','Coua_cristata','Treron_australis']



In [23]:
perch_model.change_classes(class_list) # replace fc layer with 1-output layer
quick_fit(perch_model.network, emb_train, train_labels_resampled.values, emb_val, val_labels.values,steps=1000)

Epoch 100/1000, Loss: 0.18318606913089752, Val Loss: 0.1508381962776184
val AU ROC: 0.281
val MAP: 0.281




Epoch 200/1000, Loss: 0.1181299164891243, Val Loss: 0.10216950625181198
val AU ROC: 0.444
val MAP: 0.444




Epoch 300/1000, Loss: 0.08670832961797714, Val Loss: 0.08282061666250229
val AU ROC: 0.493
val MAP: 0.493




Epoch 400/1000, Loss: 0.06842762231826782, Val Loss: 0.07174187153577805
val AU ROC: 0.496
val MAP: 0.496




Epoch 500/1000, Loss: 0.05657148361206055, Val Loss: 0.06467117369174957
val AU ROC: 0.500
val MAP: 0.500




Epoch 600/1000, Loss: 0.04825594276189804, Val Loss: 0.05987464264035225
val AU ROC: 0.499
val MAP: 0.499




Epoch 700/1000, Loss: 0.042080268263816833, Val Loss: 0.05647758021950722
val AU ROC: 0.498
val MAP: 0.498




Epoch 800/1000, Loss: 0.03729400783777237, Val Loss: 0.05399391055107117
val AU ROC: 0.500
val MAP: 0.500




Epoch 900/1000, Loss: 0.0334637314081192, Val Loss: 0.05213296785950661
val AU ROC: 0.501
val MAP: 0.501




Epoch 1000/1000, Loss: 0.03032275289297104, Val Loss: 0.050711847841739655
val AU ROC: 0.501
val MAP: 0.501
Training complete




In [24]:
perch_model.save(f'/mnt/class_data/group1_bioacoustics/sheila/Perch/{filename}.model')