# Deploy Normalized Detector

In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
import seaborn as sns
import shutil
import os 
import glob
import csv
import json
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix as confusion_matrix_sklearn

from ketos.data_handling import selection_table as sl
import ketos.data_handling.database_interface as dbi
from ketos.data_handling.parsing import load_audio_representation
from ketos.data_handling.data_feeding import BatchGenerator
from ketos.neural_networks.resnet import ResNetInterface
from ketos.audio.audio_loader import AudioFrameLoader, AudioLoader, SelectionTableIterator
from ketos.audio.spectrogram import MagSpectrogram
from ketos.neural_networks.dev_utils.detection import batch_load_audio_file_data, filter_by_threshold, filter_by_label, merge_overlapping_detections
from ketos.data_handling.data_feeding import JointBatchGen

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

print('done importing packages')

done importing packages


In [5]:
main_folder = r'E:\baseline-with-normalization-reduce-tonal'

#model_names = [main_folder + "\\" + "rs-model-0.kt", main_folder + "\\" + "rs-model-1.kt", main_folder + "\\" + "rs-model-2.kt", 
#            main_folder + "\\" + "rs-model-3.kt", main_folder + "\\" + "rs-model-4.kt", main_folder + "\\" + "rs-model-5.kt",
#            main_folder + "\\" + "rs-model-6.kt", main_folder + "\\" + "rs-model-7.kt", main_folder + "\\" + "rs-model-8.kt",
#            main_folder + "\\" + "rs-model-9.kt"]

model_names = [main_folder + "\\" + "rs-model-3.kt"]

spectro_file = r'E:\baseline-with-normalization-reduce-tonal\spec_config_100-1200Hz-0.032-hamm-normalized-reduce-tonal.json'

In [6]:
output_dir = r'E:\baseline-with-normalization-reduce-tonal\deploy\ulu2023\deploy-on-audio'
audio_folder = r'D:\ringed-seal-data\Ulu_2023_St5_Site65\test-subset'

for idx, model in enumerate(model_names):
    
    detections_csv = output_dir + '\\' + 'detections-model-3-thresh09.csv'
    temp_folder = output_dir + '\\' + 'ringedS_tmp_folder'
    
    # Look at detections above this threshold
    threshold = 0.9
    
    # Step 0.5s each time (overlap of 50% for 1 sec duration)
    step_size = 0.5
    
    # Number of samples in batch
    batch_size = 16
    
    model = ResNetInterface.load(model_file=model, new_model_folder=temp_folder)
    
    audio_repr = load_audio_representation(path=spectro_file)
    
    spec_config = audio_repr['spectrogram']
    
    audio_loader = AudioFrameLoader(path=audio_folder, duration=spec_config['duration'],
                                        step=step_size, stop=False, representation=spec_config['type'],
                                        representation_params=spec_config, pad=False)
    detections = pd.DataFrame()
    
    batch_generator = batch_load_audio_file_data(loader=audio_loader, batch_size=batch_size)
    
    for batch_data in batch_generator:
        # Run the model on the spectrogram data from the current batch
        batch_predictions = model.run_on_batch(batch_data['data'], return_raw_output=True)
    
        # Lets store our data in a dictionary
        raw_output = {'filename': batch_data['filename'], 'start': batch_data['start'], 'end': batch_data['end'],
                      'score': batch_predictions}
    
        batch_detections = filter_by_threshold(raw_output, threshold=threshold)
    
        detections = pd.concat([detections, batch_detections], ignore_index=True)
    
    detections.to_csv(detections_csv, index=False)

100%|██████████████████████████████████████████████████████████████████████████████| 1498/1498 [05:30<00:00,  4.54it/s]


In [7]:
pos_detection_csv = output_dir + '\\' + 'pos_detections_all_audio-model-3-09thresh.csv'
raven_txt = output_dir + '\\' + 'raven-formatted-detections-model-3-09thresh.txt'

# Filter the detections for only the positive results 
detections_filtered = filter_by_label(detections, labels=1).reset_index(drop=True)
print(detections_filtered)

# Merge overlapping detections 
#The score of the merged detection is computed as the average of the individual detection scores.

detections_grp = merge_overlapping_detections(detections_filtered)
print(detections_grp)
detections_grp.to_csv(pos_detection_csv, index=False)

                  filename  start    end  label     score
0    7266.230520000010.wav  147.0  148.0      1  0.998066
1    7266.230520000010.wav  147.5  148.5      1  0.964786
2    7266.230520000010.wav  186.0  187.0      1  0.968911
3    7266.230520000010.wav  254.5  255.5      1  0.999989
4    7266.230520000010.wav  255.0  256.0      1  0.999957
..                     ...    ...    ...    ...       ...
897  7266.230520031510.wav  140.5  141.5      1  0.927529
898  7266.230520031510.wav  141.0  142.0      1  0.999995
899  7266.230520031510.wav  142.5  143.5      1  0.999846
900  7266.230520031510.wav  288.0  289.0      1  1.000000
901  7266.230520031510.wav  288.5  289.5      1  1.000000

[902 rows x 5 columns]
                  filename  start    end  label     score
0    7266.230520000010.wav  147.0  148.5      1  0.981426
1    7266.230520000010.wav  186.0  187.0      1  0.968911
2    7266.230520000010.wav  254.5  256.0      1  0.999973
3    7266.230520000010.wav  263.5  265.0      1 

In [8]:
results_table = pd.read_csv(pos_detection_csv)

cols = ['filename']
results_table.loc[:,cols] = results_table.loc[:,cols].ffill()
results_table['Selection'] = results_table.index +1
results_table['View'] = 'Spectrogram 1'
results_table['Channel'] = 1
results_table['Begin Path'] = audio_folder + '\\' + results_table.filename
results_table['File Offset (s)'] = results_table.start
results_table = results_table.rename(columns={"start": "Begin Time (s)", "end": "End Time (s)", "filename": "Begin File"})
results_table['Begin File'] = results_table['Begin File']
results_table['Low Freq (Hz)'] = 100
results_table['High Freq (Hz)'] = 1200

results_table.to_csv(raven_txt, index=False, sep='\t')

In [9]:
detections_file = pd.read_csv(r'E:\baseline-with-normalization-reduce-tonal\deploy\ulu2023\deploy-on-audio\detections-model-3-thresh09.csv')

# Filter the detections for only the positive results
detections_filtered = filter_by_label(detections_file, labels=1).reset_index(drop=True)

detections_grp = merge_overlapping_detections(detections_filtered)

one_min_det = pd.DataFrame(columns=['filename', '0-60s', '60-120s', '120-180s', '180-240s', '240+s'])
all_files = np.unique(detections_file['filename'])
one_min_det['filename'] = all_files
one_min_det.set_index('filename', inplace=True)
one_min_det = one_min_det.fillna(0)

for file in detections_grp['filename'].unique():

    temp = detections_grp[detections_grp['filename']==file]
    for row in temp.iterrows():
        if row[1].end < 60:
            one_min_det.at[file, '0-60s'] = one_min_det.loc[file]['0-60s'] + 1
        elif row[1].start >= 60 and row[1].end < 120:
            one_min_det.at[file, '60-120s'] = one_min_det.loc[file]['60-120s'] + 1
        elif row[1].start >= 120 and row[1].end < 180:
            one_min_det.at[file, '120-180s'] = one_min_det.loc[file]['120-180s'] + 1
        elif row[1].start >= 180 and row[1].end < 240:
            one_min_det.at[file, '180-240s'] = one_min_det.loc[file]['180-240s'] + 1
        elif row[1].start >= 240:
            one_min_det.at[file, '240+s'] = one_min_det.loc[file]['240+s'] + 1

one_min_det['total'] = one_min_det.sum(axis=1)
one_min_det.to_excel(r'E:\baseline-with-normalization-reduce-tonal\deploy\ulu2023\deploy-on-audio\one-min-dets-thresh09.xlsx')