In [None]:
# Jupiter notebook detailing how to evaluate opensoundscape blue whale model predictions from a novel dataset

In [1]:
# read in packages
from datetime import datetime
import matplotlib.pyplot as plt
import os
import glob
import opensoundscape
import sys
sys.path.append(r"C:\Users\DAM1\CV4E")
from AudioStreamDescriptor import XWAVhdr
from opensoundscape import Audio, Spectrogram
from  apply_transfer_function import TransferFunction
from convert_audio_to_bits import convert_audio_to_bits
import fsspec
import random
import pandas as pd
import numpy as np
from scipy.special import expit
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

  "class": algorithms.Blowfish,


In [2]:
# load in predictions
predictions_SOCAL44N = pd.read_csv('L:\CV4E\BigBlueWave\CV4E\code\BigBlueWhale-oss\predictions\SOCAL44N_predictions.csv', index_col=[0,1,2])

In [None]:
# investigate score distribution
D_eval_index = predictions_SOCAL44N.index[predictions_SOCAL44N['D']==1]
D_eval = predictions_SOCAL44N.loc[D_eval_index]
D_noise_index = predictions_SOCAL44N.index[predictions_SOCAL44N['D']==0]
D_noise = predictions_SOCAL44N.loc[D_noise_index]

plt.hist(D_noise['pred_D'],bins=40,alpha=0.5,edgecolor='black',color='blue',label='Noise prediction score')
plt.hist(D_eval['pred_D'],bins=40,alpha=0.5,edgecolor='black',color='orange',label='D call prediction score')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.semilogy()
plt.title('D call prediction scores test SOCAL44N')
plt.legend(loc='upper right')

In [None]:
# plot precision recall curve
precision, recall, thresholds = precision_recall_curve(predictions_SOCAL44N['D'], predictions_SOCAL44N['pred_D'])
fig, ax = plt.subplots()
ax.plot(recall, precision, color='purple')
#add axis labels to plot
ax.set_title('Precision-Recall Curve D calls SOCAL44N')
ax.set_ylabel('Precision')
ax.set_xlabel('Recall')
#display plot
plt.show()

In [None]:
# generate confusion matrix and calculate other metrics
threshold_index = np.argmax(precision >= 0.3) # based on PR curve.....
chosen_threshold = thresholds[threshold_index]
binary_D_predictions = (predictions_SOCAL44N['pred_D'] >= chosen_threshold).astype(int) # converts to zeros and ones for the calls above this prediction score...
accuracy_value = accuracy_score(predictions_SOCAL44N['D'], binary_D_predictions)
precision_value = precision_score(predictions_SOCAL44N['D'], binary_D_predictions)
recall_value = recall_score(predictions_SOCAL44N['D'], binary_D_predictions)
f1 = f1_score(predictions_SOCAL44N['D'], binary_D_predictions)
conf_matrix = confusion_matrix(predictions_SOCAL44N['D'], binary_D_predictions)
print("    TN", "  FP")
print("    FN","  TP")
print(conf_matrix)

In [None]:
# read in transfer function
tf_path = 'L:\\CV4E\\BigBlueWave\\CV4E\\transfer_functions\\618_101105_B_HARP_CINMS18B.tf'
TF = pd.read_csv(tf_path,delim_whitespace=True,header=None)
TF.columns=['frequency','calibration']

In [None]:
def apply_transfer_function(spec,tf_dataframe,decibel_limits=None):
    """
    apply transfer function to opensoundscape.Spectrogram object
    
    helper function to apply transfer function to Spectrogram
    transfer function is list of | freq | dB offset |
    we should interpolate to the frequencies contained in the specrogram

    Args:
        spec: a Specrogram object
        tf_dataframe: dataframe with columns 'freq' (frequencies in Hz) and 'intensity' (dB offset)
        decibel_limits: default None will use original spectrogram's .decibel_units attribute;
            optionally specify a new decibel_limits range for the returned Spectrogram
    """
    if decibel_limits is None:
        decibel_limits = spec.decibel_limits
        
    #extract frequency column and intensity column from transfer function dataframe
    transfer_function_freqs = tf_dataframe.frequency.values
    transfer_function_offsets = tf_dataframe.calibration.values
    
    # linearly interpolate the frequencies from the transfer function table
    # onto the frequencies of the spectrogram to get offsets for each spectrogram row
    spec_offsets = np.interp(spec.frequencies,transfer_function_freqs, transfer_function_offsets)
    
    # add the offset values to each row of the spectrogram
    new_spec_values = (spec.spectrogram.transpose() + np.array(spec_offsets)).transpose()
    
    #create a new spectrogram object with the new values
    return opensoundscape.Spectrogram(new_spec_values,times=spec.times,frequencies=spec.frequencies,decibel_limits=decibel_limits)

In [None]:
# investigate high scoring D calls for SOCAL44N
# evaluate D call true postives with high prediction scores

D_eval_tp = D_eval[D_eval['pred_D'] > 10]
D_eval_tp = D_eval_tp.reset_index()

# bit transform
bits = 16 
abs_max = 2 ** (bits - 1)

# Create a for loop to process each false positive
for index, row in D_eval_tp.iterrows():
    file_path = row['file']
    start_time = row['start_time']
    annotations = [row['D'],row['A NE Pacific'], row['B NE Pacific']]
    predictions = [row['pred_D'],row['pred_A'], row['pred_B']]
    rounded_predictions = [round(p, 2) for p in predictions]
    
    # Load the audio
    D_fp = opensoundscape.Audio.from_file(file_path, sample_rate=2000, offset=start_time, duration=15)
    
    # Scale the audio
    D_fp.samples = np.float64(D_fp.samples) * abs_max
    
    # Create a spectrogram
    D_fp1 = opensoundscape.Spectrogram.from_audio(D_fp, window_type='hamming', window_samples=1000, 
                                                  overlap_samples=900, fft_size=2000, 
                                                  decibel_limits=(-200,200), scaling='density')
    
    # Apply the transfer function
    D_fp1_TF = apply_transfer_function(D_fp1, TF, decibel_limits=(40, 140))
    
    # Bandpass filter and plot
    # 
    filtered_image = D_fp1_TF.bandpass(10, 150).to_image()
    
    # Display the image using matplotlib
    plt.imshow(filtered_image)
    plt.axis('off')  # Turn off axis labels and ticks
    plt.show()

    print(f'Annotations: D={annotations[0]},A NE Pacific={annotations[1]}, B NE Pacific={annotations[2]}')
    print(f'Predictions: D={rounded_predictions[0]},A NE Pacific={rounded_predictions[1]}, B NE Pacific={rounded_predictions[2]}')
