## Ensemble Baseline

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import seaborn as sns
import shutil
import os 
import glob
import csv
import json
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix as confusion_matrix_sklearn
from random import randint

from ketos.data_handling import selection_table as sl
import ketos.data_handling.database_interface as dbi
from ketos.data_handling.parsing import load_audio_representation
from ketos.data_handling.data_feeding import BatchGenerator
from ketos.neural_networks.resnet import ResNetInterface
from ketos.audio.audio_loader import AudioFrameLoader, AudioLoader, SelectionTableIterator
from ketos.audio.spectrogram import MagSpectrogram
from ketos.neural_networks.dev_utils.detection import batch_load_audio_file_data, filter_by_threshold, filter_by_label, merge_overlapping_detections
from ketos.data_handling.data_feeding import JointBatchGen

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

print('done importing packages')

  super(Adam, self).__init__(name, **kwargs)


done importing packages


In [2]:
main_folder = r'E:\ensemble-baseline'
baseline_folder = r'E:\final-baseline-detector'
spectro_file = r'E:\ensemble-baseline\spec_config_100-1200Hz-0.032-hamm-normalized0.json'
recipe_file = baseline_folder + '\\' + 'resnet_recipe-1.json'
db_name = baseline_folder + '\\' + 'final-baseline-db.h5'
file_durations_file = r'E:\final-baseline-detector\all_file_durations_complete.xlsx'
file_durations = pd.read_excel(file_durations_file)


model_names = [main_folder + '\\' + 'final-baseline-model-0.kt', main_folder + '\\' + 'final-baseline-model-1.kt', main_folder + '\\' + 'final-baseline-model-2.kt', 
              main_folder + '\\' + 'final-baseline-model-3.kt', main_folder + '\\' + 'final-baseline-model-4.kt', main_folder + '\\' + 'final-baseline-model-5.kt',
              main_folder + '\\' + 'final-baseline-model-6.kt', main_folder + '\\' + 'final-baseline-model-7.kt', main_folder + '\\' + 'final-baseline-model-8.kt',
              main_folder + '\\' + 'final-baseline-model-9.kt', main_folder + '\\' + 'final-baseline-model-10.kt']

temp_folders = [main_folder + '\\' + 'rs-temp-0', main_folder + '\\' + 'rs-temp-1', main_folder + '\\' + 'rs-temp-2', main_folder + '\\' + 'rs-temp-3', main_folder + '\\' + 'rs-temp-4',
               main_folder + '\\' + 'rs-temp-5', main_folder + '\\' + 'rs-temp-6', main_folder + '\\' + 'rs-temp-7', main_folder + '\\' + 'rs-temp-8', main_folder + '\\' + 'rs-temp-9',
               main_folder + '\\' + 'rs-temp-10']

np_seeds = [1736, 680, 1996, 1522, 867, 543, 249, 707, 584, 1236, 161]
tf_seeds = [1660, 977, 1396, 1456, 1539, 673, 1743, 1492, 1776, 1273, 394]
#array = [randint(1, 2000) for i in range(11)]
#print(array)

data_folder = r'D:\ringed-seal-data'

In [None]:
for idx, model in enumerate(model_names):

    # Set the random seed for numpy and tensorflow 
    np.random.seed(np_seeds[idx])
    tf.random.set_seed(tf_seeds[idx])
    
    # Set the batch size and number of epochs for training
    batch_size = 16
    n_epochs = 40
    
    # Set the log folder and checkpoint folder 
    log_folder = main_folder + '\\' + 'logs' + str(idx)
    checkpoint_folder = main_folder + '\\' + 'checkpoints' + str(idx)
    
    # Open the database file in read mode
    db = dbi.open_file(db_name, 'r')
    
    # Open the training and validation tables respectively 
    train_data = dbi.open_table(db, "/train/data")
    val_data = dbi.open_table(db, "/val/data")
    
    # Create batches of training data of size batch size, using the specified data table 
    # This returns indices of the data in each batch along with their labels 
    train_generator = BatchGenerator(batch_size=batch_size, data_table=train_data,
                                        output_transform_func=ResNetInterface.transform_batch,
                                        shuffle=True, refresh_on_epoch_end=True)
    
    # Create batches of validation data of size batch size, using the specified data table 
    # This returns indices of the data in each batch along with their labels 
    val_generator = BatchGenerator(batch_size=batch_size, data_table=val_data,
                                       output_transform_func=ResNetInterface.transform_batch,
                                       shuffle=False, refresh_on_epoch_end=False)
    
    # Build the ResNet model file based off of the recipe file - this creates a "ResNetInterface" object 
    resnet = ResNetInterface.build(recipe_file)
    
    # Set the training and validation generators to the batch generators created above 
    resnet.train_generator = train_generator
    resnet.val_generator = val_generator
    
    # Set the model log and checkpoint directory 
    resnet.log_dir = log_folder
    resnet.checkpoint_dir = checkpoint_folder
    
    # Train the model, looping through all of the training and validation data 
    # See code map for more information
    resnet.train_loop(n_epochs=n_epochs, verbose=False, log_csv=True, csv_name='log-' + str(idx) +'.csv')
    
    # Close the database 
    db.close()
    
    # Save the model file, and keep track of the spectrogram parameters used to generate that model 
    resnet.save(model, audio_repr_file=spectro_file)
    
    print('Done training model # ' + str(idx))