In [None]:
"""
Created on Mon Apr 29 16:15:46 2024

@author: Michaela ALksne

Detailed workflow for training and testing WhaleSongNet:

"""

In [None]:
"""
Created on Mon Apr 29 16:15:46 2024

@author: Michaela ALksne

First module to train a resnet-18 CNN to classify A and B calls in 30 second spectrograms
sets model and spectrogram parameters and connects to wandB so user can monitor training progress

Model parameters: 
    - multi-target model: 3 labels per sample
    - classification with ResampleLoss function
    - weights pretrained on ImageNet
    - learning rate = 0.001
    - cooling factor = 0.3 (decreases learning rate by multiplying 0.001*3 every ten epochs)
    - epochs = 12 
    - batch_size = 12

Spectrogram parameters:
    - 30 second windows
    - 3200 Hz(samples/second) sampling rate 
    - 3200 point-FFT which results in 1 Hz bins
    - 90 % overlap (or 1400 samples), resulting in 100 ms bins
    - 1600 Hamming window samples. A Hamming window is used to smooth the signal and reduce spectral leakage/artifacts for the FFT. 
    - minimum frequency: 10 Hz
    - maximum frequency: 150 Hz
    
Spectrogram augmentations: 
    - frequency_mask: adds random horizontal bars over image
    - time_mask: adds random vertical bars over the image
    - add_noise: adds random Gaussian noise to image 
    
Notes for user:
batch_size – number of training files to load/process before re-calculating the loss function and backpropagation
num_workers – parallelization (ie, cores or cpus)
log_interval – interval in epochs to evaluate model with validation dataset and print metrics to the log

"""

In [None]:
import opensoundscape
import glob
import os
import pandas as pd
import numpy as np
import sklearn
import librosa
import torch
import wandb
import random
import matplotlib.pyplot as plt
from scipy.special import expit
from sklearn.metrics import precision_recall_curve

In [None]:
 # read in train and validation dataframes
train_clips = pd.read_csv('../../data/processed/train.csv') # point to csv files
val_clips = pd.read_csv('../../data/processed/validation.csv') # point to csv files
print(train_clips.sum()) 
print(val_clips.sum())

In [None]:
# modify relative filepaths 
data_path = "YOUR\DATA\PATH\HERE" # copy and paste the path to your wav files
train_clips['file'] = train_clips.file.str.replace("..\\..\\data\\raw\\", data_path)
val_clips['file'] = val_clips.file.str.replace("..\\..\\data\\raw\\", data_path)
train_clips.set_index(['file', 'start_time', 'end_time'], inplace=True) 
val_clips.set_index(['file', 'start_time', 'end_time'], inplace=True)

In [None]:
# TRAIN MODEL
calls_of_interest = ["A NE Pacific", "B NE Pacific"] #define the calls for CNN
model = opensoundscape.CNN('resnet18',classes=calls_of_interest,sample_duration=30.0, single_target=False) # create a CNN object designed to recognize 30-second samples
opensoundscape.ml.cnn.use_resample_loss(model) # loss function for mult-target classification

# moodify model preprocessing for making spectrograms 
model.preprocessor.pipeline.to_spec.params.window_type = 'hamming'
model.preprocessor.pipeline.to_spec.params.window_samples = 1600 
model.preprocessor.pipeline.to_spec.params.overlap_samples = 1400 
model.preprocessor.pipeline.to_spec.params.fft_size = 3200 
model.preprocessor.pipeline.to_spec.params.decibel_limits = (-120,150)
model.preprocessor.pipeline.to_spec.params.scaling = 'density'
model.preprocessor.pipeline.bandpass.params.min_f = 10
model.preprocessor.pipeline.bandpass.params.max_f = 150
model.preprocessor.pipeline.frequency_mask.bypass = True
model.preprocessor.pipeline.time_mask.set(max_width = 0.1, max_masks=5) #adds vertical lines as data augmentation
model.preprocessor.pipeline.add_noise.set(std=0.1) #adds guassian distributed white noise
model.preprocessor.pipeline.random_affine.bypass=True
model.optimizer_params['lr']=0.001
model.lr_cooling_factor = 0.3 
model.wandb_logging['n_preview_samples']=100 # number of samples to look at in wandB

model.train(
    train_clips, 
    val_clips, 
    epochs = 12, 
    batch_size= 128, 
    log_interval=1, #log progress every 1 batches
    num_workers = 12, 
    save_interval = 1, #save checkpoint every 1 epoch
    save_path = '../../models' #location to save checkpoints (epochs)
    )

In [None]:
"""
Created on Mon Jan 22 11:35:46 2024

@author: Michaela Alksne

Module to use the model for running inference, or predicting on new data.
Our model has a "predict" function which we can call to predict on a new dataset. 
In this case, we are predicting on our test data. Therefore we are able to generate preformance metrics as described below. 
However, if we were predicting on unlabeled data, we would just the use the models predict function and would not be able to plot the preformance metrics.

Here we load in our trained model and modify the spectrogram parameters because our test data has a different sampling rate than our training data. This will not effect the model. This is just resizing the images so they match. 
Our model has a "predict" function which we can call to predict on a new dataset. 
    - We load in our model and our test data and generate predictions. 
    - Then we join the predictions with the true labels and evaluate model preformance by plotting our precision-recall curve using scikit learn
    - We also plot the distribution of our scores for true and false detections. 

"""

In [None]:
# EVALUATE MODEL
# read in test dataframes
test_clips = pd.read_csv('../../data/processed/test.csv') # point to csv files
print(test_clips.sum())

In [None]:
# modify relative filepaths 
data_path = "YOUR\DATA\PATH\HERE" # copy and paste the path to your wav files
test_clips['file'] = test_clips.file.str.replace("..\\..\\data\\raw\\", data_path)
test_clips.set_index(['file', 'start_time', 'end_time'], inplace=True)

In [None]:
model = opensoundscape.ml.cnn.load_model('../../models/best.model') #read in best model. 

# moodify model preprocessing for making spectrograms with proper resolution
model.preprocessor.pipeline.to_spec.params.window_type = 'hamming' # using hamming window
model.preprocessor.pipeline.to_spec.params.window_samples = 1000 # window samples
model.preprocessor.pipeline.to_spec.params.overlap_samples = 900 # 90% overlap, for 3200 Fs this means 900 samples, and 0.05 sec bins
model.preprocessor.pipeline.to_spec.params.fft_size = 2000 # FFT = Fs, 1 Hz bins
model.preprocessor.pipeline.to_spec.params.decibel_limits = (-120,150) # oss preprocessing sets dB limits.

# predict 
test_scores = model.predict(test_clips, num_workers=12,batch_size=128)
test_scores.columns = ['pred_A','pred_B']
test_all = test_clips.join(test_scores)
#save output 
    
## B CALLS ###

# plot precision recall curve for B calls
precision, recall, thresholds = precision_recall_curve(test_all['B NE Pacific'], test_all['pred_B'])
fig, ax = plt.subplots()
ax.plot(recall, precision, color='purple')
#add axis labels to plot
ax.set_title('Precision-Recall Curve B calls test data')
ax.set_ylabel('Precision')
ax.set_xlabel('Recall')
plt.show()

# plot score distribution B calls 
B_eval_index = test_all.index[test_all['B NE Pacific']==1]
B_eval = test_all.loc[B_eval_index]
B_noise_index = test_all.index[test_all['B NE Pacific']==0]
B_noise = test_all.loc[B_noise_index]
plt.hist(B_noise['pred_B'],bins=40,alpha=0.5,edgecolor='black',color='blue',label='Noise prediction score')
plt.hist(B_eval['pred_B'],bins=40,alpha=0.5,edgecolor='black',color='orange',label='B call prediction score')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.semilogy()
plt.legend(loc='upper right')
plt.title('B call prediction scores test data')
plt.show()