## Fine Tuning Pearce Point Ensemble

After deploying the "final" version of the detector on PP, it was found that there were a lot of false positives caused by bowheads and ice. Here I will create a new database containing negative examples which include ice and bowheads, and fine-tune the existing detector to see if it improves results for pearce point.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import seaborn as sns
import shutil
import os
import glob
import csv
import time
import json
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix as confusion_matrix_sklearn
import scipy

from ketos.data_handling import selection_table as sl
import ketos.data_handling.database_interface as dbi
from ketos.data_handling.parsing import load_audio_representation
from ketos.data_handling.data_feeding import BatchGenerator
from ketos.neural_networks.resnet import ResNetInterface
from ketos.audio.audio_loader import AudioFrameLoader, AudioLoader, SelectionTableIterator
from ketos.audio.spectrogram import MagSpectrogram
from ketos.neural_networks.dev_utils.detection import batch_load_audio_file_data, filter_by_threshold, filter_by_label, merge_overlapping_detections
from ketos.data_handling.data_feeding import JointBatchGen

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

print('done importing packages')

done importing packages


  super(Adam, self).__init__(name, **kwargs)


In [2]:
def drop_rows(file_durations, table):

    drop = []

    print('cleaning training table of original length ' + str(len(table)))

    for idex, row in table.iterrows():
    
        # filename is row[0], end time is idex.end
        print(row.name[0])
        index = file_durations.loc[file_durations['filename'] == row.name[0]].index
        duration = file_durations['duration'][index].values[0]
    
        if duration < row.end:
            # drop the row corresponding to that sel_id and filename from the dataframe
            drop.append(idex)
    
        if row.start < 0:
            drop.append(idex)
    
    print('Number of rows to drop (note, one entry may be in list twice): ' + str(len(drop)))

    return drop

In [3]:
# Create one second segments from new false annotations 
annot_neg_pp = pd.read_excel(r'E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune\pearce-point-false-annots.xlsx')
std_annot_neg_pp = sl.standardize(table=annot_neg_pp, trim_table=True)
pp_neg_add = sl.select(annotations=std_annot_neg_pp, length=1.0, step=1, min_overlap=1, center=False)
pp_neg_add['Class'] = 'BO/I'
print(pp_neg_add.head(3))
pp_neg_add.to_excel(r'E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune\new_negatives_pp.xlsx')

                                                           label       start  \
filename                                           sel_id                      
E:\baseline-with-normalization-reduce-tonal\pea... 0           0  299.878735   
                                                   1           0  300.878735   
                                                   2           0  301.878735   

                                                                  end Class  
filename                                           sel_id                    
E:\baseline-with-normalization-reduce-tonal\pea... 0       300.878735  BO/I  
                                                   1       301.878735  BO/I  
                                                   2       302.878735  BO/I  


In [4]:
# because of the weird ketos merging, read in the excel sheet instead of using the above
annot_neg_pp = r'E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune\new_negatives_pp.xlsx'

neg_folder = r'E:\baseline-with-normalization-reduce-tonal\annots\neg'
pos_folder = r'E:\baseline-with-normalization-reduce-tonal\annots\pos'
db_name = r'E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune\pp-w-false-additions.h5'
data_folder = r'D:\ringed-seal-data'
file_durations = pd.read_excel(r'E:\baseline-with-normalization-reduce-tonal\all_file_durations_complete.xlsx')

# Old negative file
pp_old_neg = pd.read_excel(neg_folder + '\\' + 'PP-negs-joined.xlsx')
pp_old_neg = pp_old_neg.ffill()

pp_new_neg = pd.read_excel(annot_neg_pp)
pp_new_neg = pp_new_neg.ffill()

pp_neg = pd.concat([pp_old_neg, pp_new_neg])

print(pp_neg.head(3))
pp_neg.to_excel(r'E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune\concat_negs.xlsx')

                                            filename  sel_id       start  \
0  D:\ringed-seal-data\Pearce_Point_2018_2019\120...       0   26.070746   
1  D:\ringed-seal-data\Pearce_Point_2018_2019\120...       1   71.371480   
2  D:\ringed-seal-data\Pearce_Point_2018_2019\120...       2  114.012267   

          end  label classification Class    dup  
0   27.070746      0            NaN    OK  False  
1   72.371480      0            NaN     O  False  
2  115.012267      0            NaN    OU  False  


In [5]:
pp_tr_neg = pp_neg.head(int(len(pp_neg)*(85/100)))
pp_te_neg = pp_neg[~pp_neg.index.isin(pp_tr_neg.index)]

pp_tr_neg = sl.standardize(table=pp_tr_neg)
pp_te_neg = sl.standardize(table=pp_te_neg)

print('Negatives standardized? ' + str(sl.is_standardized(pp_tr_neg)) + str(sl.is_standardized(pp_te_neg)))

pp_pos = pd.read_csv(pos_folder + '\\' + 'PP_all_formatted_1sec.csv')
pp_pos = pp_pos.ffill()
pp_tr_pos = pp_pos.head(int(len(pp_pos)*(85/100)))
pp_te_pos = pp_pos[~pp_pos.index.isin(pp_tr_pos.index)]

pp_tr_pos = sl.standardize(table=pp_tr_pos, start_labels_at_1=True)
pp_te_pos = sl.standardize(table=pp_te_pos, start_labels_at_1=True)

pp_tr = pd.concat([pp_tr_pos, pp_tr_neg])
pp_te = pd.concat([pp_te_pos, pp_te_neg])

pp_tr.to_excel(r'E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune\train.xlsx')
pp_te.to_excel(r'E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune\test.xlsx')

Negatives standardized? TrueTrue


In [6]:
spectro_file = r'E:\baseline-with-normalization-reduce-tonal\spec_config_100-1200Hz-0.032-hamm-normalized-reduce-tonal.json'
spec_cfg = load_audio_representation(spectro_file, name="spectrogram")

dbi.create_database(output_file=db_name,  # empty brackets
                    dataset_name=r'train', selections=pp_tr, data_dir=data_folder,
                    audio_repres=spec_cfg)

dbi.create_database(output_file=db_name,  # empty brackets
                    dataset_name=r'test', selections=pp_te, data_dir=data_folder,
                    audio_repres=spec_cfg)

 71%|████████████████████████████████████████████████████████▌                       | 164/232 [00:15<00:01, 47.85it/s]

'tuple' object has no attribute 'stft_args'
'tuple' object has no attribute 'stft_args'
'tuple' object has no attribute 'stft_args'
'tuple' object has no attribute 'stft_args'


100%|████████████████████████████████████████████████████████████████████████████████| 232/232 [00:15<00:00, 14.59it/s]


228 items saved to E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune\pp-w-false-additions.h5


100%|██████████████████████████████████████████████████████████████████████████████████| 35/35 [00:00<00:00, 61.28it/s]

35 items saved to E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune\pp-w-false-additions.h5





In [7]:
main_folder = r'E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune'

model_folder = r'E:\baseline-with-normalization-reduce-tonal\models'

pretrained_models = [model_folder + '\\' + 'rs-model-0.kt', model_folder + '\\' + 'rs-model-1.kt', model_folder + '\\' + 'rs-model-2.kt', 
                     model_folder + '\\' + 'rs-model-3.kt', model_folder + '\\' + 'rs-model-4.kt', model_folder + '\\' + 'rs-model-5.kt',
                    model_folder + '\\' + 'rs-model-6.kt', model_folder + '\\' + 'rs-model-7.kt', model_folder + '\\' + 'rs-model-8.kt',
                    model_folder + '\\' + 'rs-model-9.kt']

new_models = [main_folder + '\\' + 'rs-model-0-ft.kt', main_folder + '\\' + 'rs-model-1-ft.kt', main_folder + '\\' + 'rs-model-2-ft.kt', 
              main_folder + '\\' + 'rs-model-3-ft.kt', main_folder + '\\' + 'rs-model-4-ft.kt', main_folder + '\\' + 'rs-model-5-ft.kt',
             main_folder + '\\' + 'rs-model-6-ft.kt', main_folder + '\\' + 'rs-model-7-ft.kt', main_folder + '\\' + 'rs-model-8-ft.kt',
              main_folder + '\\' + 'rs-model-9-ft.kt']

np_seeds = [1736, 680, 1996, 1522, 867, 543, 249, 707, 584, 1236, 161]
tf_seeds = [1660, 977, 1396, 1456, 1539, 673, 1743, 1492, 1776, 1273, 394]

In [8]:
# Set the batch size and number of epochs for training
batch_size = 16
n_epochs = 80

for idx, model in enumerate(pretrained_models):
    
    # Set the random seed for numpy and tensorflow
    np.random.seed(np_seeds[idx])
    tf.random.set_seed(tf_seeds[idx])

    # Set the log folder and checkpoint folder
    log_folder = main_folder + '\\' + 'logs' + str(idx)
    checkpoint_folder = main_folder + '\\' + 'checkpoints' +str(idx)
    
    # Open the database file in read mode
    db = dbi.open_file(db_name, 'r')
    
    # Open the training and validation tables respectively
    train_data = dbi.open_table(db, "/train/data")
    val_data = dbi.open_table(db, "/test/data")
    
    # Create batches of training data of size batch size, using the specified data table
    # This returns indices of the data in each batch along with their labels
    train_generator = BatchGenerator(batch_size=batch_size, data_table=train_data,
                                        output_transform_func=ResNetInterface.transform_batch,
                                        shuffle=True, refresh_on_epoch_end=True)
    
    # Create batches of validation data of size batch size, using the specified data table 
    # This returns indices of the data in each batch along with their labels 
    val_generator = BatchGenerator(batch_size=batch_size, data_table=val_data,
                                       output_transform_func=ResNetInterface.transform_batch,
                                       shuffle=False, refresh_on_epoch_end=False)
    
    # Load the pretrained model, replacing the top (aka. classification layers). This method inherently freezes the base.
    resnet = ResNetInterface.load(model, replace_top=True)
    
    # Set the training and validation generators to the batch generators created above
    resnet.train_generator = train_generator
    resnet.val_generator = val_generator
    
    # Set the model log and checkpoint directory
    resnet.log_dir = log_folder
    resnet.checkpoint_dir = checkpoint_folder
    
    # digging into their scripts for fine tuning
    # resnet.py, "clone with new top"
    
    # Train the model, looping through all of the training and validation data
    # See code map for more information
    resnet.train_loop(n_epochs=n_epochs, verbose=False, log_csv=True, csv_name='log-' + str(idx) +'.csv', validate=True)
    #resnet.train_loop(n_epochs=n_epochs, verbose=False, log_csv=True, csv_name='log.csv', validate=False)
    
    # Close the database
    db.close()
    
    # Save the model file, and keep track of the spectrogram parameters used to generate that model
    resnet.save(new_models[idx], audio_repr_file=spectro_file)
    
    print('Done')

Done
Done
Done
Done
Done
Done
Done
Done
Done
Done


## Deploy on Audio

In [10]:
def get_batch_generator(spectro_file, audio_folder, step_size, batch_size): 
    
    audio_repr = load_audio_representation(path=spectro_file)

    spec_config = audio_repr['spectrogram']

    audio_loader = AudioFrameLoader(path=audio_folder, duration=spec_config['duration'],
                                step=step_size, stop=False, representation=spec_config['type'],
                                representation_params=spec_config, pad=False)

    batch_generator = batch_load_audio_file_data(loader=audio_loader, batch_size=batch_size)

    return batch_generator

In [11]:
def load_models(model_names, temp_folders):
    
    models = []
    for idx, model_name in enumerate(model_names):
        models.append(ResNetInterface.load(model_file=model_name, new_model_folder=temp_folders[idx]))

    return models

In [12]:
def get_detections(batch_generator, models, output_dir, threshold, raven_txt, audio_folder):
    
    detections_pos = pd.DataFrame()
    detections_neg = pd.DataFrame()

    for ibx, batch_data in enumerate(batch_generator):

        for idx, model in enumerate(models):

            # Run the model on the spectrogram data from the current batch
            batch_predictions = model.run_on_batch(batch_data['data'], return_raw_output=True)

            if idx == 0:
                # Lets store our data in a dictionary
    
                raw_output_neg = {'filename': batch_data['filename'], 'start': batch_data['start'],
                                  'end': batch_data['end'], '0-0': batch_predictions[:, 0]}
                
                raw_output_pos = {'filename': batch_data['filename'], 'start': batch_data['start'],
                                  'end': batch_data['end'], '1-0': batch_predictions[:, 1]}
    
            else:
                raw_output_neg |= {'0-' + str(idx): batch_predictions[:, 0]}
                
                raw_output_pos |= {'1-' + str(idx): batch_predictions[:, 1]}
            
        detections_pos = pd.concat([detections_pos, pd.DataFrame.from_dict(raw_output_pos)])
        detections_neg = pd.concat([detections_neg, pd.DataFrame.from_dict(raw_output_neg)])

    detections_pos.to_excel(output_dir + '\\' + 'detections-pos.xlsx', index=False)
    detections_neg.to_excel(output_dir + '\\' + 'detections-neg.xlsx', index=False)

    mean_cols_pos = detections_pos.columns[3:]
    mean_cols_neg = detections_neg.columns[3:]
    
    #detections_pos['mean-pos'] = detections_pos[mean_cols_pos].mean(axis=1)
    detections_pos['med-pos'] = detections_pos[mean_cols_pos].quantile(0.5, axis=1)
    #detections_neg['mean-neg'] = detections_neg[mean_cols_neg].mean(axis=1)
    detections_neg['med-neg'] = detections_neg[mean_cols_neg].quantile(0.5, axis=1)
    
    #merge_df = detections_pos[['filename', 'start', 'end', 'mean-pos']].copy()
    merge_df = detections_pos[['filename', 'start', 'end', 'med-pos']].copy()
    #merge_df['mean-neg'] = detections_neg['mean-neg']
    merge_df['med-neg'] = detections_neg['med-neg']
    
    scores = []
    for row in merge_df.iterrows():
        score = [row[1]['med-neg'], row[1]['med-pos']]
        #score = [row[1]['mean-neg'], row[1]['mean-pos']]
        scores.extend([score])
    
    dict = {'filename': merge_df['filename'], 'start': merge_df['start'], 'end': merge_df['end'], 'score': scores}
    
    filter_detections = filter_by_threshold(dict, threshold=threshold)
    detections_filtered = filter_by_label(filter_detections, labels=1).reset_index(drop=True)
    print(len(detections_filtered))
    detections_grp = merge_overlapping_detections(detections_filtered)
    print(len(detections_grp))
    detections_grp.to_excel(output_dir + '\\' + 'detections-filtered-and-grouped.xlsx', index=False)
    
    results_table = detections_grp
    
    cols = ['filename']
    results_table.loc[:,cols] = results_table.loc[:,cols].ffill()
    results_table['Selection'] = results_table.index +1
    results_table['View'] = 'Spectrogram 1'
    results_table['Channel'] = 1
    results_table['Begin Path'] = audio_folder + '\\' + results_table.filename
    results_table['File Offset (s)'] = results_table.start
    results_table = results_table.rename(columns={"start": "Begin Time (s)", "end": "End Time (s)", "filename": "Begin File"})
    results_table['Begin File'] = results_table['Begin File']
    results_table['Low Freq (Hz)'] = 100
    results_table['High Freq (Hz)'] = 1200
    
    results_table.to_csv(raven_txt, index=False, sep='\t')

    return detections_grp

In [13]:
def get_one_min_dets(detections_grp, output_dir):

    one_min_det = pd.DataFrame(columns=['filename', '0-60s', '60-120s', '120-180s', '180-240s', '240+s'])
    all_files = np.unique(detections_grp['filename'])
    one_min_det['filename'] = all_files
    one_min_det.set_index('filename', inplace=True)
    one_min_det = one_min_det.fillna(0)
    
    for file in detections_grp['filename'].unique():
    
        temp = detections_grp[detections_grp['filename']==file]
        for row in temp.iterrows():
            if row[1].end < 60:
                one_min_det.at[file, '0-60s'] = one_min_det.loc[file]['0-60s'] + 1
            elif row[1].start >= 60 and row[1].end < 120:
                one_min_det.at[file, '60-120s'] = one_min_det.loc[file]['60-120s'] + 1
            elif row[1].start >= 120 and row[1].end < 180:
                one_min_det.at[file, '120-180s'] = one_min_det.loc[file]['120-180s'] + 1
            elif row[1].start >= 180 and row[1].end < 240:
                one_min_det.at[file, '180-240s'] = one_min_det.loc[file]['180-240s'] + 1
            elif row[1].start >= 240:
                one_min_det.at[file, '240+s'] = one_min_det.loc[file]['240+s'] + 1
    
    one_min_det['total'] = one_min_det.sum(axis=1)
    one_min_det.to_excel(output_dir + '\\' + 'one-min-dets.xlsx')

In [15]:
model_folder = r'E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune\fine-tuned-models'

model_names = [model_folder + "\\" + "rs-model-0-ft.kt", model_folder + "\\" + "rs-model-1-ft.kt", model_folder + "\\" + "rs-model-2-ft.kt",
            model_folder + "\\" + "rs-model-3-ft.kt", model_folder + "\\" + "rs-model-4-ft.kt", model_folder + "\\" + "rs-model-5-ft.kt",
            model_folder + "\\" + "rs-model-6-ft.kt", model_folder + "\\" + "rs-model-7-ft.kt", model_folder + "\\" + "rs-model-8-ft.kt",
            model_folder + "\\" + "rs-model-9-ft.kt"]

temp_folders = [model_folder + "\\" + "temp-0", model_folder + "\\" + "temp-1", model_folder + "\\" + "temp-2",
            model_folder + "\\" + "temp-3", model_folder + "\\" + "temp-4", model_folder + "\\" + "temp-5",
            model_folder + "\\" + "temp-6", model_folder + "\\" + "temp-7", model_folder + "\\" + "temp-8",
            model_folder + "\\" + "temp-9"]

spectro_file = r'E:\baseline-with-normalization-reduce-tonal\spec_config_100-1200Hz-0.032-hamm-normalized-reduce-tonal.json'

# Step 0.5s each time (overlap of 50% for 1 sec duration)
step_size = 0.5

# Number of samples in batch
batch_size = 16

In [16]:
main_folder = r'E:\baseline-with-normalization-reduce-tonal\pearce-point\fine-tune'

audio_folder = r'E:\baseline-with-normalization-reduce-tonal\pearce-point\audio'

# Threshold
threshold = 0.5

output_dir = main_folder
detections_csv = output_dir + '\\' + 'detections-avg.csv'
temp_folder = output_dir + '\\' + 'ringedS_tmp_folder'
pos_detection = output_dir + '\\' + 'grouped-filtered-dets.xlsx'
raven_txt = output_dir + '\\' + 'raven-formatted-detections.txt'

In [17]:
batch_generator = get_batch_generator(spectro_file, audio_folder, step_size, batch_size)
all_models = load_models(model_names, temp_folders)
detections_grp = get_detections(batch_generator, all_models, output_dir, threshold, raven_txt, audio_folder)
get_one_min_dets(detections_grp, output_dir)





100%|████████████████████████████████████████████████████████████████████████████████| 748/748 [02:54<00:00,  4.29it/s]


397
154
