In [19]:
import torch
import pandas as pd
from pathlib import Path
import numpy as np
import pandas as pd
import random
from glob import glob
import sklearn

from tqdm.autonotebook import tqdm
from sklearn.metrics import average_precision_score, roc_auc_score
from pathlib import Path

#set up plotting
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for large visuals
%config InlineBackend.figure_format = 'retina'

# opensoundscape transfer learning tools
from opensoundscape.ml.shallow_classifier import MLPClassifier, quick_fit, fit_classifier_on_embeddings


In [20]:
from sklearn.model_selection import train_test_split
from opensoundscape import BoxedAnnotations, CNN

In [21]:
filename = 'Not_so_shallow_classifier_perch'

In [22]:
metadata = pd.read_csv('./data/metadata.csv')
metadata.head()

Unnamed: 0.1,Unnamed: 0,Begin Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Species,Conf,Call_type,Filename,SiteHab,RecordingSched,RecorderName,DP,SoundFile,SoundFile_path,Split,Raven_path
0,0,0.763354,3.078034,2854.369,5965.41,Philepitta_castanea,,,AGN_A-AGN04_20230429_054000.txt,AGN,A,AGN_A-AGN04,AGN04,AGN_A-AGN04_20230429_054000.WAV,/mnt/class_data/group1_bioacoustics/sheila/cv4...,validation,/mnt/class_data/group1_bioacoustics/sheila/Pro...
1,1,6.588949,9.475884,2571.297,5451.15,Philepitta_castanea,,,AGN_A-AGN04_20230429_054000.txt,AGN,A,AGN_A-AGN04,AGN04,AGN_A-AGN04_20230429_054000.WAV,/mnt/class_data/group1_bioacoustics/sheila/cv4...,validation,/mnt/class_data/group1_bioacoustics/sheila/Pro...
2,2,10.43478,12.523959,3085.557,5348.299,Philepitta_castanea,,,AGN_A-AGN04_20230429_054000.txt,AGN,A,AGN_A-AGN04,AGN04,AGN_A-AGN04_20230429_054000.WAV,/mnt/class_data/group1_bioacoustics/sheila/cv4...,validation,/mnt/class_data/group1_bioacoustics/sheila/Pro...
3,3,14.854197,15.436757,3085.557,5862.558,Philepitta_castanea,,,AGN_A-AGN04_20230429_054000.txt,AGN,A,AGN_A-AGN04,AGN04,AGN_A-AGN04_20230429_054000.WAV,/mnt/class_data/group1_bioacoustics/sheila/cv4...,validation,/mnt/class_data/group1_bioacoustics/sheila/Pro...
4,4,55.57803,59.479764,2621.359,5475.728,Philepitta_castanea,,,AGN_A-AGN04_20230429_054000.txt,AGN,A,AGN_A-AGN04,AGN04,AGN_A-AGN04_20230429_054000.WAV,/mnt/class_data/group1_bioacoustics/sheila/cv4...,validation,/mnt/class_data/group1_bioacoustics/sheila/Pro...


In [23]:
val_metadata=metadata[metadata["Split"]=="validation"]
train_metadata=metadata[metadata["Split"]=="train"]
test_metadata=metadata[metadata["Split"]=="test"]

In [24]:
val_metadata=val_metadata.reset_index()
train_metadata=train_metadata.reset_index()
test_metadata=test_metadata.reset_index()

In [25]:
# assume we have a list of raven annotation files and corresponding audio files
# load the annotations into OpenSoundscape
raven_file_paths = val_metadata['Raven_path']
audio_file_paths = val_metadata['SoundFile_path']
val_annotations = BoxedAnnotations.from_raven_files(raven_file_paths,'species',audio_file_paths)

raven_file_paths = train_metadata['Raven_path']
audio_file_paths = train_metadata['SoundFile_path']
train_annotations = BoxedAnnotations.from_raven_files(raven_file_paths,'species',audio_file_paths)

raven_file_paths = test_metadata['Raven_path']
audio_file_paths = test_metadata['SoundFile_path']
test_annotations = BoxedAnnotations.from_raven_files(raven_file_paths,'species',audio_file_paths)

In [26]:
# Create the table with a dataframe
conversion_table = pd.DataFrame(
    {'original':['Eulemur_albifrons', 'Eulemur_fulvus'],
     'new':['Eulemur_sp', 'Eulemur_sp']}
)

# Or create the table in its own spreadsheet
#conversion_table = pd.read_csv('my_conversion_filename_here.csv')

conversion_table

Unnamed: 0,original,new
0,Eulemur_albifrons,Eulemur_sp
1,Eulemur_fulvus,Eulemur_sp


In [27]:
#correct annotations in each of the splits
val_annotations_corrected = val_annotations.convert_labels(conversion_table)
val_annotations_corrected.audio_files = val_annotations_corrected.df['audio_file'].values #workaround for issue #872
val_annotations_corrected.df.head()

train_annotations_corrected = train_annotations.convert_labels(conversion_table)
train_annotations_corrected.audio_files = train_annotations_corrected.df['audio_file'].values #workaround for issue #872
train_annotations_corrected.df.head()

test_annotations_corrected = test_annotations.convert_labels(conversion_table)
test_annotations_corrected.audio_files = test_annotations_corrected.df['audio_file'].values #workaround for issue #872
test_annotations_corrected.df.head()


Unnamed: 0,audio_file,annotation_file,annotation,start_time,end_time,low_f,high_f,Selection,View,Delta Freq (Hz),Call_type,Delta Time (s),Channel,Avg Power Density (dB FS/Hz),Conf
0,/mnt/class_data/group1_bioacoustics/sheila/cv4...,/mnt/class_data/group1_bioacoustics/sheila/Pro...,Hypsipetes_madagascariensis,54.905264,56.190913,1968.454,5223.975,1,Spectrogram 1,,call,,1,,0.9
1,/mnt/class_data/group1_bioacoustics/sheila/cv4...,/mnt/class_data/group1_bioacoustics/sheila/Pro...,Dicrurus_forficatus,4.943181,11.329778,659.794,5690.722,1,Spectrogram 1,,call,,1,,0.9
2,/mnt/class_data/group1_bioacoustics/sheila/cv4...,/mnt/class_data/group1_bioacoustics/sheila/Pro...,Dicrurus_forficatus,13.011363,15.447871,551.7,5773.196,2,Spectrogram 1,,call,,1,,0.9
3,/mnt/class_data/group1_bioacoustics/sheila/cv4...,/mnt/class_data/group1_bioacoustics/sheila/Pro...,Dicrurus_forficatus,17.215908,19.923388,494.845,5773.196,3,Spectrogram 1,,call,,1,,0.9
4,/mnt/class_data/group1_bioacoustics/sheila/cv4...,/mnt/class_data/group1_bioacoustics/sheila/Pro...,Dicrurus_forficatus,24.28254,29.043458,742.268,6103.093,4,Spectrogram 1,,call,,1,,0.9


In [28]:
# pick classes to train the model on. These should occur in the annotated data
class_list = ['Hypsipetes_madagascariensis','Copsychus_albospecularis','Coracopsis_nigra','Dicrurus_forficatus','Coua_caerulea','Zosterops_maderaspatanus','Eurystomus_glaucurus','Agapornis_canus','Saxicola_torquatus','Cyanolanius_madagascarinus','Leptopterus_chabert','Nesoenas_picturatus','Coua_reynaudii','Ceblepyris_cinereus','Neodrepanis_coruscans','Philepitta_castanea','Eulemur_sp','Coua_cristata','Treron_australis']


In [29]:
# create labels for fixed-duration (5 second) clips
val_labels = val_annotations_corrected.clip_labels(
  clip_duration=5,
  clip_overlap=0,
  min_label_overlap=0.25,
  class_subset=class_list
)

train_labels = train_annotations_corrected.clip_labels(
  clip_duration=5,
  clip_overlap=0,
  min_label_overlap=0.25,
  class_subset=class_list
)

test_labels = test_annotations_corrected.clip_labels(
  clip_duration=5,
  clip_overlap=0,
  min_label_overlap=0.25,
  class_subset=class_list
)

In [30]:
from bioacoustics_model_zoo import Perch

In [31]:
perch_model = Perch()

                    This architecture is not listed in opensoundscape.ml.cnn_architectures.ARCH_DICT.
                    It will not be available for loading after saving the model with .save() (unless using pickle=True). 
                    To make it re-loadable, define a function that generates the architecture from arguments: (n_classes, n_channels) 
                    then use opensoundscape.ml.cnn_architectures.register_architecture() to register the generating function.

                    The function can also set the returned object's .constructor_name to the registered string key in ARCH_DICT

                    See opensoundscape.ml.cnn_architectures module for examples of constructor functions
                    


In [32]:
# Decide number of hidden layers. 100 is a good starting point because the embedding size of perch are 1280 - for each sample it makes 1280 numbers. Trying to collapse info from 1280 parameters into 100 and then 100 into 19
#Can always reduce hidden_layer_sizes if overfitting. 
perch_model.initialize_custom_classifier(hidden_layer_sizes=[100],classes=class_list)

In [33]:
#created embeddings
emb_train = perch_model.embed(train_labels, return_dfs=False, batch_size=128, num_workers=0)
emb_val = perch_model.embed(val_labels, return_dfs=False, batch_size=128, num_workers=0)

  0%|          | 0/80 [00:00<?, ?it/s]

2025-01-24 00:04:31.984634: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator jax2tf_infer_fn_/assert_equal_1/Assert/AssertGuard/Assert
2025-01-24 00:06:11.273601: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator jax2tf_infer_fn_/assert_equal_1/Assert/AssertGuard/Assert


  0%|          | 0/12 [00:00<?, ?it/s]

2025-01-24 00:06:28.599582: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator jax2tf_infer_fn_/assert_equal_1/Assert/AssertGuard/Assert


In [34]:
# pick classes to train the model on. These should occur in the annotated data
class_list = ['Hypsipetes_madagascariensis','Copsychus_albospecularis','Coracopsis_nigra','Dicrurus_forficatus','Coua_caerulea','Zosterops_maderaspatanus','Eurystomus_glaucurus','Agapornis_canus','Saxicola_torquatus','Cyanolanius_madagascarinus','Leptopterus_chabert','Nesoenas_picturatus','Coua_reynaudii','Ceblepyris_cinereus','Neodrepanis_coruscans','Philepitta_castanea','Eulemur_sp','Coua_cristata','Treron_australis']



In [35]:
#used embeddings with quick fit to fit shallow classifier (perch_model.network)
perch_model.change_classes(class_list) # replace fc layer with 1-output layer
quick_fit(perch_model.network, emb_train, train_labels.values, emb_val, val_labels.values,steps=1000)

Epoch 100/1000, Loss: 0.0647353008389473, Val Loss: 0.06366882473230362
val AU ROC: 0.171
val MAP: 0.171




Epoch 200/1000, Loss: 0.04323918744921684, Val Loss: 0.04578624665737152
val AU ROC: 0.299
val MAP: 0.299




Epoch 300/1000, Loss: 0.03311040252447128, Val Loss: 0.03910275548696518
val AU ROC: 0.345
val MAP: 0.345




Epoch 400/1000, Loss: 0.027132272720336914, Val Loss: 0.03599374741315842
val AU ROC: 0.443
val MAP: 0.443




Epoch 500/1000, Loss: 0.023140212520956993, Val Loss: 0.03470354154706001
val AU ROC: 0.478
val MAP: 0.478




Epoch 600/1000, Loss: 0.020124131813645363, Val Loss: 0.034549813717603683
val AU ROC: 0.481
val MAP: 0.481




Epoch 700/1000, Loss: 0.01767042838037014, Val Loss: 0.035070452839136124
val AU ROC: 0.481
val MAP: 0.481




Epoch 800/1000, Loss: 0.0155946658924222, Val Loss: 0.03593556955456734
val AU ROC: 0.482
val MAP: 0.482




Epoch 900/1000, Loss: 0.013799488544464111, Val Loss: 0.03706939518451691
val AU ROC: 0.478
val MAP: 0.478




Epoch 1000/1000, Loss: 0.012236585840582848, Val Loss: 0.03847609460353851
val AU ROC: 0.476
val MAP: 0.476
Training complete




In [36]:
perch_model.save(f'/mnt/class_data/group1_bioacoustics/sheila/Perch/{filename}.model')