In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from opensoundscape import BoxedAnnotations, CNN
from opensoundscape import SpectrogramPreprocessor
import opensoundscape

In [8]:
notebook_name = 'CNN_model_overlay.ipynb'
epochs = 20
save_path= '/mnt/class_data/group1_bioacoustics/sheila/model_checkpoints/model_7_overlay'
log_interval=20
batch_size=256
n_samples_per_class = 5221
sample_duration=2
window_samples = 2048
min_f=0
max_f=9000
step_size = 10
gamma = 0.5






In [9]:
train_labels = pd.read_csv('data/train_labels.csv',index_col=['file', 'start_time', 'end_time'])
val_labels = pd.read_csv('data/val_labels.csv',index_col=['file', 'start_time', 'end_time'])
test_labels = pd.read_csv('data/test_labels.csv',index_col=['file', 'start_time', 'end_time'])
train_labels.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Hypsipetes_madagascariensis,Copsychus_albospecularis,Coracopsis_nigra,Dicrurus_forficatus,Coua_caerulea,Zosterops_maderaspatanus,Eurystomus_glaucurus,Agapornis_canus,Saxicola_torquatus,Cyanolanius_madagascarinus,Leptopterus_chabert,Nesoenas_picturatus,Coua_reynaudii,Ceblepyris_cinereus,Neodrepanis_coruscans,Philepitta_castanea,Eulemur_sp,Coua_cristata,Treron_australis
file,start_time,end_time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
/mnt/class_data/group1_bioacoustics/sheila/cv4e_dataset/train/Marojejy/AGN_A/AGN_A-AGN08/AGN_A-AGN08_20230421_084000.WAV,0.0,2.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
/mnt/class_data/group1_bioacoustics/sheila/cv4e_dataset/train/Marojejy/AGN_A/AGN_A-AGN08/AGN_A-AGN08_20230421_084000.WAV,2.0,4.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
/mnt/class_data/group1_bioacoustics/sheila/cv4e_dataset/train/Marojejy/AGN_A/AGN_A-AGN08/AGN_A-AGN08_20230421_084000.WAV,4.0,6.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
/mnt/class_data/group1_bioacoustics/sheila/cv4e_dataset/train/Marojejy/AGN_A/AGN_A-AGN08/AGN_A-AGN08_20230421_084000.WAV,6.0,8.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
/mnt/class_data/group1_bioacoustics/sheila/cv4e_dataset/train/Marojejy/AGN_A/AGN_A-AGN08/AGN_A-AGN08_20230421_084000.WAV,8.0,10.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [10]:
# pick classes to train the model on. These should occur in the annotated data
class_list = ['Hypsipetes_madagascariensis','Copsychus_albospecularis','Coracopsis_nigra','Dicrurus_forficatus','Coua_caerulea','Zosterops_maderaspatanus','Eurystomus_glaucurus','Agapornis_canus','Saxicola_torquatus','Cyanolanius_madagascarinus','Leptopterus_chabert','Nesoenas_picturatus','Coua_reynaudii','Ceblepyris_cinereus','Neodrepanis_coruscans','Philepitta_castanea','Eulemur_sp','Coua_cristata','Treron_australis']

class_list==train_labels.columns.values


array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [15]:
train_labels_resampled = opensoundscape.data_selection.resample(train_labels,n_samples_per_class = n_samples_per_class, downsample = False,upsample = True)

In [23]:
import wandb
'''wandb.login()'''#when logging in on a new machine
wandb_session = wandb.init(
    project='Rewilding_Madagascar',
)

In [39]:
#create new model and change parameters then visualize
# create a CNN and train on the labeled data
model = CNN(architecture='resnet18', sample_duration=sample_duration, classes=class_list)


In [41]:
'''#Include a step in my preprocessing that does the overlay - it will pick a sample from overlay df and combine with sample currently pre-processing'''
my_preprocessor = SpectrogramPreprocessor(sample_duration = sample_duration, overlay_df=train_labels)
model.preprocessor = my_preprocessor

In [42]:

#changes learning rate over time
#default is wait a certain number of epochs and then multiply by x (step learning)
model.lr_scheduler_params['kwargs']['step_size'] = step_size
model.lr_scheduler_params['kwargs']['gamma'] = gamma

In [43]:
model.preprocessor.pipeline.bandpass.set(min_f=min_f,max_f=max_f)
model.preprocessor.pipeline.to_spec.params.window_samples = window_samples
model.train(train_labels_resampled, val_labels, epochs=epochs, log_interval=log_interval, num_workers=8, batch_size=batch_size, wandb_session = wandb_session, save_path= save_path)
wandb_session.finish()

In [6]:
import shutil
import os

# Get the path of the current script
current_script_path = '/home/Sheila/rewilding_madagascar/CNN_model_overlay.ipynb'

# Define the target path where you want to copy the notebook
target_path = '/mnt/class_data/group1_bioacoustics/sheila/model_checkpoints/CNN_model_overlay.ipynb'

# Use shutil.copyfile to copy the notebook
try:
    shutil.copyfile(current_script_path, target_path)
    print(f"Notebook copied successfully to {target_path}")
except Exception as e:
    print(f"An error occurred while copying the file: {e}")



Notebook copied successfully to /mnt/class_data/group1_bioacoustics/sheila/model_checkpoints/CNN_model_overlay.ipynb
