# Tests - 20240116

In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
import seaborn as sns
import shutil
import os 
import glob
import csv
import json
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix as confusion_matrix_sklearn

from ketos.data_handling import selection_table as sl
import ketos.data_handling.database_interface as dbi
from ketos.data_handling.parsing import load_audio_representation
from ketos.data_handling.data_feeding import BatchGenerator
from ketos.neural_networks.resnet import ResNetInterface
from ketos.audio.audio_loader import AudioFrameLoader, AudioLoader, SelectionTableIterator
from ketos.audio.spectrogram import MagSpectrogram
from ketos.neural_networks.dev_utils.detection import batch_load_audio_file_data, filter_by_threshold
from ketos.data_handling.data_feeding import JointBatchGen

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

print('done importing packages')

done importing packages


## Manual Dataset Tests

### Get Dataset Split w 68/32 Split for All/Ulu2022

In [9]:
pos_folder = r'E:\tests\1sec-manual\inputs\annots\pos'

# Get list of all csv files in that folder
files_pos = glob.glob(pos_folder + "/*.csv")

site_names = []
num_annots = []

# For each csv file
for file in files_pos:

    annots = pd.read_csv(file)

    site_name = file.split('\\')[-1].split('.')[0].split('_')[0]

    site_names.append(site_name)
    num_annots.append(len(annots))

ULU22_val = num_annots[site_names.index('ULU2022')]
other_val = sum(num_annots) - ULU22_val
all_annots = sum(num_annots)

ulu_2022_split = 0.32
all_else_split = 1 - ulu_2022_split

dataset_split = [0.7, 0.2, 0.1]

train_annots = round(all_annots*dataset_split[0])
val_annots = round(all_annots*dataset_split[1])
test_annots = round(all_annots*dataset_split[2])

# ulu22 vals
ulu22_tr = round(train_annots*ulu_2022_split)
ulu22_va = round(val_annots*ulu_2022_split)
ulu22_te = round(test_annots*ulu_2022_split)

ulu22_leftovers = ULU22_val - ulu22_tr - ulu22_va - ulu22_te

if ulu22_leftovers < 0:
    ulu22_te = ulu22_te + ulu22_leftovers

if ulu22_leftovers > 0 :
    ulu22_tr = ulu22_tr + ulu22_leftovers

all_ulu = ulu22_tr + ulu22_te + ulu22_va

if all_ulu != ULU22_val:
    print('Something went wrong with Ulu')
    exit()

ulu2022_vals = [ulu22_tr, ulu22_va, ulu22_te]

# rest vals
rest_tr = round(train_annots*all_else_split)
rest_va = round(val_annots*all_else_split)
rest_te = round(test_annots*all_else_split)

# totals
all_added = rest_tr + rest_va + rest_te

if all_added < other_val:
    leftover = other_val - all_added
    rest_tr = rest_tr + leftover

if all_added > other_val:
    leftover = all_added - other_val
    rest_va = rest_va - leftover

all_added2 = rest_tr + rest_va + rest_te

cb_perc = num_annots[0]/all_added2
kk_perc = num_annots[1]/all_added2
ulu_perc = num_annots[4]/all_added2

# split into other site vals
cb_tr = round(cb_perc*rest_tr)
cb_va = round(cb_perc*rest_va)
cb_te = round(cb_perc*rest_te)
total_cb = cb_tr + cb_va + cb_te
if total_cb < num_annots[0]:
    leftover_cb = num_annots[0] - total_cb
    cb_tr = cb_tr + leftover_cb
if total_cb > num_annots[0]:
    leftover_cb = num_annots[0] - total_cb
    cb_va = cb_va + leftover_cb
cb_vals = [cb_tr, cb_va, cb_te]

kk_tr = round(kk_perc*rest_tr)
kk_va = round(kk_perc*rest_va)
kk_te = round(kk_perc*rest_te)
total_kk = kk_tr + kk_va + kk_te
if total_kk < num_annots[1]:
    leftover_kk = num_annots[1] - total_kk
    kk_tr = kk_tr + leftover_kk
if total_kk > num_annots[1]:
    leftover_kk = num_annots[1] - total_kk
    kk_va = kk_va + leftover_kk
kk_vals = [kk_tr, kk_va, kk_te]
    
ulu_tr = round(ulu_perc*rest_tr)
ulu_va = round(ulu_perc*rest_va)
ulu_te = round(ulu_perc*rest_te)
total_ulu = ulu_tr + ulu_va + ulu_te
if total_ulu < num_annots[4]:
    leftover_ulu = num_annots[4] - total_ulu
    ulu_tr = ulu_tr + leftover_ulu
if total_ulu > num_annots[4]:
    leftover_ulu = num_annots[4] - total_ulu
    ulu_va = ulu_va + leftover_ulu
ulu_vals = [ulu_tr, ulu_va, ulu_te]

print('ulu22 vals (tr, va, te): ' + str(ulu2022_vals) + ', total: ' + str(sum(ulu2022_vals)))
print('ulu vals (tr, va, te): ' + str(ulu_vals) + ', total: ' + str(sum(ulu_vals)))
print('kk vals (tr, va, te): ' + str(kk_vals) + ', total: ' + str(sum(kk_vals)))
print('cb vals (tr, va, te): ' + str(cb_vals) + ', total: ' + str(sum(cb_vals)))

ulu22 vals (tr, va, te): [1037, 296, 143], total: 1476
ulu vals (tr, va, te): [669, 191, 95], total: 955
kk vals (tr, va, te): [1348, 384, 192], total: 1924
cb vals (tr, va, te): [133, 38, 19], total: 190


In [10]:
# Load in user inputs 
main_folder = r'E:\tests\1sec-manual'

# These are copied from the 2sec edited folder 
neg_folder = r'E:\tests\1sec-manual\inputs\annots\neg'
pos_folder = r'E:\tests\1sec-manual\inputs\annots\pos'

file_durations_file = r'E:\tests\all_file_durations_complete.xlsx'
file_durations = pd.read_excel(file_durations_file)

data_folder = r'D:\ringed-seal-data'

In [11]:
## Create Database ##

# negatives tables and standarize for ketos
ulu_neg = pd.read_excel(neg_folder + '\\' + 'ULU-negs-joined.xlsx')
ulu_neg = ulu_neg.ffill()
ulu_neg = sl.standardize(table=ulu_neg)
print('Negatives standardized? ' + str(sl.is_standardized(ulu_neg)))

ulu2022_neg = pd.read_excel(neg_folder + '\\' + 'ULU2022-negs-joined.xlsx')
ulu2022_neg = ulu2022_neg.ffill()
ulu2022_neg = sl.standardize(table=ulu2022_neg)
print('Negatives standardized? ' + str(sl.is_standardized(ulu2022_neg)))

kk_neg = pd.read_excel(neg_folder + '\\' + 'KK-negs-joined.xlsx')
kk_neg = kk_neg.ffill()
kk_neg = sl.standardize(table=kk_neg)
print('Negatives standardized? ' + str(sl.is_standardized(kk_neg)))

cb_neg = pd.read_excel(neg_folder + '\\' + 'CB-negs-joined.xlsx')
cb_neg = cb_neg.ffill()
cb_neg = sl.standardize(table=cb_neg)
print('Negatives standardized? ' + str(sl.is_standardized(cb_neg)))

# positives tables
ulu_pos = pd.read_csv(pos_folder + '\\' + 'ULU_all_formatted_1sec.csv')
ulu_pos = ulu_pos.ffill()
ulu_pos = sl.standardize(table=ulu_pos, start_labels_at_1=True)
print('Positives standardized? ' + str(sl.is_standardized(ulu_pos)))

ulu2022_pos = pd.read_csv(pos_folder + '\\' + 'ULU2022_all_formatted_1sec.csv')
ulu2022_pos = ulu2022_pos.ffill()
ulu2022_pos = sl.standardize(table=ulu2022_pos, start_labels_at_1=True)
print('Positives standardized? ' + str(sl.is_standardized(ulu2022_pos)))

kk_pos = pd.read_csv(pos_folder + '\\' + 'KK_all_formatted_1sec.csv')
kk_pos = kk_pos.ffill()
kk_pos = sl.standardize(table=kk_pos, start_labels_at_1=True)
print('Positives standardized? ' + str(sl.is_standardized(kk_pos)))

cb_pos = pd.read_csv(pos_folder + '\\' + 'CB_all_formatted_1sec.csv')
cb_pos = cb_pos.ffill()
cb_pos = sl.standardize(table=cb_pos, start_labels_at_1=True)
print('Positives standardized? ' + str(sl.is_standardized(cb_pos)))

# join into complete tables

ulu_pos_tr = ulu_pos.head(ulu_vals[0])
ulu_pos_va = ulu_pos[ulu_vals[0]:ulu_vals[0] + ulu_vals[1]]
ulu_pos_te = ulu_pos.tail(ulu_vals[2])

ulu_neg_tr = ulu_neg.head(ulu_vals[0])
ulu_neg_va = ulu_neg[ulu_vals[0]:ulu_vals[0] + ulu_vals[1]]
ulu_neg_te = ulu_neg.tail(ulu_vals[2])

ulu_tr = pd.concat([ulu_pos_tr, ulu_neg_tr])
ulu_va = pd.concat([ulu_pos_va, ulu_neg_va])
ulu_te = pd.concat([ulu_pos_te, ulu_neg_te])

ulu2022_pos_tr = ulu2022_pos.head(ulu2022_vals[0])
ulu2022_pos_va = ulu2022_pos[ulu2022_vals[0]:ulu2022_vals[0] + ulu2022_vals[1]]
ulu2022_pos_te = ulu2022_pos.tail(ulu2022_vals[2])

ulu2022_neg_tr = ulu2022_neg.head(ulu2022_vals[0])
ulu2022_neg_va = ulu2022_neg[ulu2022_vals[0]:ulu2022_vals[0] + ulu2022_vals[1]]
ulu2022_neg_te = ulu2022_neg.tail(ulu2022_vals[2])

ulu2022_tr = pd.concat([ulu2022_pos_tr, ulu2022_neg_tr])
ulu2022_va = pd.concat([ulu2022_pos_va, ulu2022_neg_va])
ulu2022_te = pd.concat([ulu2022_pos_te, ulu2022_neg_te])

kk_pos_tr = kk_pos.head(kk_vals[0])
kk_pos_va = kk_pos[kk_vals[0]:kk_vals[0] + kk_vals[1]]
kk_pos_te = kk_pos.tail(kk_vals[2])

kk_neg_tr = kk_neg.head(kk_vals[0])
kk_neg_va = kk_neg[kk_vals[0]:kk_vals[0] + kk_vals[1]]
kk_neg_te = kk_neg.tail(kk_vals[2])

kk_tr = pd.concat([kk_pos_tr, kk_neg_tr])
kk_va = pd.concat([kk_pos_va, kk_neg_va])
kk_te = pd.concat([kk_pos_te, kk_neg_te])

cb_pos_tr = cb_pos.head(cb_vals[0])
cb_pos_va = cb_pos[cb_vals[0]:cb_vals[0] + cb_vals[1]]
cb_pos_te = cb_pos.tail(cb_vals[2])

cb_neg_tr = cb_neg.head(cb_vals[0])
cb_neg_va = cb_neg[cb_vals[0]:cb_vals[0] + cb_vals[1]]
cb_neg_te = cb_neg.tail(cb_vals[2])

cb_tr = pd.concat([cb_pos_tr, cb_neg_tr])
cb_va = pd.concat([cb_pos_va, cb_neg_va])
cb_te = pd.concat([cb_pos_te, cb_neg_te])

# final three tables

train = pd.concat([ulu_tr, ulu2022_tr, cb_tr, kk_tr])
val = pd.concat([ulu_va, ulu2022_va, cb_va, kk_va])
test = pd.concat([ulu_te, ulu2022_te, cb_te, kk_te])

Negatives standardized? True
Negatives standardized? True
Negatives standardized? True
Negatives standardized? True
Positives standardized? True
Positives standardized? True
Positives standardized? True
Positives standardized? True


In [12]:
drop_rows_tr = []

print('cleaning training table of original length ' + str(len(train)))

for idex, row in train.iterrows():

    # filename is row[0], end time is idex.end
    index = file_durations.loc[file_durations['filename'] == row.name[0]].index
    duration = file_durations['duration'][index].values[0]

    if duration < row.end:
        # drop the row corresponding to that sel_id and filename from the dataframe
        drop_rows_tr.append(idex)

    if row.start < 0:
        drop_rows_tr.append(idex)

print('Number of dropped rows: ' + str(len(drop_rows_tr)))

train = train.drop(drop_rows_tr)
print(len(train))

cleaning training table of original length 6374
Number of dropped rows: 10
6362


In [13]:
drop_rows_va = []

print('cleaning validation table of original length ' + str(len(val)))

for idex, row in val.iterrows():

    # filename is row[0], end time is idex.end
    index = file_durations.loc[file_durations['filename'] == row.name[0]].index
    duration = file_durations['duration'][index].values[0]

    if duration < row.end:
        # drop the row corresponding to that sel_id and filename from the dataframe
        drop_rows_va.append(idex)

    if row.start < 0:
        drop_rows_va.append(idex)

print('Number of dropped rows: ' + str(len(drop_rows_va)))

val = val.drop(drop_rows_va)
print(len(val))

cleaning validation table of original length 1818
Number of dropped rows: 4
1813


In [14]:
drop_rows_te = []

print('cleaning test table of original length ' + str(len(test)))

for idex, row in test.iterrows():

    # filename is row[0], end time is idex.end
    index = file_durations.loc[file_durations['filename'] == row.name[0]].index
    duration = file_durations['duration'][index].values[0]

    if duration < row.end:
        # drop the row corresponding to that sel_id and filename from the dataframe
        drop_rows_te.append(idex)

    if row.start < 0:
        drop_rows_te.append(idex)

print('Number of dropped rows: ' + str(len(drop_rows_te)))

test = test.drop(drop_rows_te)
print(len(test))

cleaning test table of original length 898
Number of dropped rows: 2
894


### Spectro Tests

In [24]:
# Get number of spectrogram tests 
spectro_folder = r'E:\tests\spectro-files'
spectro_files = glob.glob(spectro_folder + "/*.json")

recipe = r'E:\tests\recipe-files\resnet_recipe-1.json'

spectro_test_folder = r'E:\tests\1sec-manual\spectro-tests'

# one: 100 to 1200 (usual)
# two: 0 to 1500
# three: 0 to 3000
# four: usual, rate 2000
# five: usual, rate 4000
# six: usual, window 0.032

db_names = [spectro_test_folder + '\\' + 'spectro-1-db.h5', spectro_test_folder + '\\' + 'spectro-2-db.h5', spectro_test_folder + '\\' + 'spectro-3-db.h5',
             spectro_test_folder + '\\' + 'spectro-4-db.h5', spectro_test_folder + '\\' + 'spectro-5-db.h5', spectro_test_folder + '\\' + 'spectro-6-db.h5']

model_names = [spectro_test_folder + '\\' + 'rs-spec1.kt', spectro_test_folder + '\\' + 'rs-spec2.kt', spectro_test_folder + '\\' + 'rs-spec3.kt',
               spectro_test_folder + '\\' + 'rs-spec4.kt', spectro_test_folder + '\\' + 'rs-spec5.kt', spectro_test_folder + '\\' + 'rs-spec6.kt']

temp_folders = [spectro_test_folder + '\\' + 'rs-temp-1', spectro_test_folder + '\\' + 'rs-temp-2', spectro_test_folder + '\\' + 'rs-temp-3',
               spectro_test_folder + '\\' + 'rs-temp-4', spectro_test_folder + '\\' + 'rs-temp-5', spectro_test_folder + '\\' + 'rs-temp-6']

In [21]:
#import tables
#tables.file._open_files.close_all()

In [22]:
# For each spectro file:
for idex, spec in enumerate(spectro_files):

    spec_file = spectro_folder + '\\' + 'spec_config_' + str(idex+1) + '.json'
    
    # join into a database
    # Load the spectrogram representation & parameters, this returns a dict 
    spec_cfg = load_audio_representation(spec_file, name="spectrogram")

    # Create a table called "train" in the database, defined by db_name, using the "train" selections table, the spectrogram config, and the audio data 
    # Behind the hood, this creates an AudioLoader and AudioWriter Ketos function which generates the spectrograms for each selection 
    # For the specific spectrogram in this case, the spectrograms are of size [1500,56] where 56 refers to the frequency dimension and 1500 refers to the time dimension 
    # The size of the spectrogram is 1500*56, which is 84000
    dbi.create_database(output_file=db_names[idex],  # empty brackets
                        dataset_name=r'train', selections=train, data_dir=data_folder,
                        audio_repres=spec_cfg)
    
    dbi.create_database(output_file=db_names[idex],  # empty brackets
                        dataset_name=r'val', selections=val, data_dir=data_folder,
                        audio_repres=spec_cfg)
    
    dbi.create_database(output_file=db_names[idex],  # empty brackets
                        dataset_name=r'test', selections=test, data_dir=data_folder,
                        audio_repres=spec_cfg)

100%|██████████████████████████████████████████████████████████████████████████████| 6362/6362 [02:33<00:00, 41.46it/s]


6362 items saved to E:\tests\1sec-manual\spectro-tests\spectro-1-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1813/1813 [00:42<00:00, 42.81it/s]


1813 items saved to E:\tests\1sec-manual\spectro-tests\spectro-1-db.h5


100%|████████████████████████████████████████████████████████████████████████████████| 894/894 [00:21<00:00, 41.00it/s]


894 items saved to E:\tests\1sec-manual\spectro-tests\spectro-1-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 6362/6362 [01:06<00:00, 95.17it/s]


6362 items saved to E:\tests\1sec-manual\spectro-tests\spectro-2-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1813/1813 [00:19<00:00, 94.52it/s]


1813 items saved to E:\tests\1sec-manual\spectro-tests\spectro-2-db.h5


100%|████████████████████████████████████████████████████████████████████████████████| 894/894 [00:09<00:00, 94.30it/s]


894 items saved to E:\tests\1sec-manual\spectro-tests\spectro-2-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 6362/6362 [01:07<00:00, 93.64it/s]


6362 items saved to E:\tests\1sec-manual\spectro-tests\spectro-3-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1813/1813 [00:19<00:00, 92.84it/s]


1813 items saved to E:\tests\1sec-manual\spectro-tests\spectro-3-db.h5


100%|████████████████████████████████████████████████████████████████████████████████| 894/894 [00:09<00:00, 90.44it/s]


894 items saved to E:\tests\1sec-manual\spectro-tests\spectro-3-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 6362/6362 [01:09<00:00, 92.10it/s]


6362 items saved to E:\tests\1sec-manual\spectro-tests\spectro-4-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1813/1813 [00:19<00:00, 91.42it/s]


1813 items saved to E:\tests\1sec-manual\spectro-tests\spectro-4-db.h5


100%|████████████████████████████████████████████████████████████████████████████████| 894/894 [00:09<00:00, 92.17it/s]


894 items saved to E:\tests\1sec-manual\spectro-tests\spectro-4-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 6362/6362 [01:36<00:00, 65.91it/s]


6362 items saved to E:\tests\1sec-manual\spectro-tests\spectro-5-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1813/1813 [00:43<00:00, 41.35it/s]


1813 items saved to E:\tests\1sec-manual\spectro-tests\spectro-5-db.h5


100%|████████████████████████████████████████████████████████████████████████████████| 894/894 [00:23<00:00, 37.26it/s]


894 items saved to E:\tests\1sec-manual\spectro-tests\spectro-5-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 6362/6362 [02:12<00:00, 48.10it/s]


6362 items saved to E:\tests\1sec-manual\spectro-tests\spectro-6-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1813/1813 [00:35<00:00, 51.25it/s]


1813 items saved to E:\tests\1sec-manual\spectro-tests\spectro-6-db.h5


100%|████████████████████████████████████████████████████████████████████████████████| 894/894 [00:15<00:00, 58.24it/s]


894 items saved to E:\tests\1sec-manual\spectro-tests\spectro-6-db.h5


In [26]:
for idex, spec in enumerate(spectro_files): 

    spec_file = spectro_folder + '\\' + 'spec_config_' + str(idex+1) + '.json'
    
    # join into a database
    # Load the spectrogram representation & parameters, this returns a dict 
    spec_cfg = load_audio_representation(spec_file, name="spectrogram")

    # Set the random seed for numpy and tensorflow 
    np.random.seed(1000)
    tf.random.set_seed(2000)
    
    # Set the batch size and number of epochs for training
    batch_size = 16
    n_epochs = 40
    
    # Set the log folder and checkpoint folder 
    log_folder = spectro_test_folder + '\\' + 'logs' + str(idex)
    checkpoint_folder = spectro_test_folder + '\\' + 'checkpoints' + str(idex)
    
    # Open the database file in read mode
    db = dbi.open_file(db_names[idex], 'r')
    
    # Open the training and validation tables respectively 
    train_data = dbi.open_table(db, "/train/data")
    val_data = dbi.open_table(db, "/val/data")
    
    # Create batches of training data of size batch size, using the specified data table 
    # This returns indices of the data in each batch along with their labels 
    train_generator = BatchGenerator(batch_size=batch_size, data_table=train_data,
                                        output_transform_func=ResNetInterface.transform_batch,
                                        shuffle=True, refresh_on_epoch_end=True)
    
    # Create batches of validation data of size batch size, using the specified data table 
    # This returns indices of the data in each batch along with their labels 
    val_generator = BatchGenerator(batch_size=batch_size, data_table=val_data,
                                       output_transform_func=ResNetInterface.transform_batch,
                                       shuffle=False, refresh_on_epoch_end=False)
    
    # Build the ResNet model file based off of the recipe file - this creates a "ResNetInterface" object 
    resnet = ResNetInterface.build(recipe)
    
    # Set the training and validation generators to the batch generators created above 
    resnet.train_generator = train_generator
    resnet.val_generator = val_generator
    
    # Set the model log and checkpoint directory 
    resnet.log_dir = log_folder
    resnet.checkpoint_dir = checkpoint_folder
    
    # Train the model, looping through all of the training and validation data 
    # See code map for more information
    resnet.train_loop(n_epochs=n_epochs, verbose=False, log_csv=True, csv_name='log.csv')
    
    # Close the database 
    db.close()
    
    # Save the model file, and keep track of the spectrogram parameters used to generate that model 
    resnet.save(model_names[idex], audio_repr_file=spec_file)

    print('Done spectro ' + str(idex+1))

Done spectro 1
Done spectro 2
Done spectro 3
Done spectro 4
Done spectro 5
Done spectro 6


#### Recipe 2

In [27]:
# Get number of spectrogram tests 
spectro_folder = r'E:\tests\spectro-files'
spectro_files = glob.glob(spectro_folder + "/*.json")

recipe = r'E:\tests\recipe-files\resnet_recipe-2.json'

spectro_test_folder = r'E:\tests\1sec-manual\recipe-2'

# one: 100 to 1200 (usual)
# two: 0 to 1500
# three: 0 to 3000
# four: usual, rate 2000
# five: usual, rate 4000
# six: usual, window 0.032

db_names = [r'E:\tests\1sec-manual\spectro-tests' + '\\' + 'spectro-1-db.h5', r'E:\tests\1sec-manual\spectro-tests' + '\\' + 'spectro-2-db.h5', 
            r'E:\tests\1sec-manual\spectro-tests' + '\\' + 'spectro-3-db.h5', r'E:\tests\1sec-manual\spectro-tests' + '\\' + 'spectro-4-db.h5', 
            r'E:\tests\1sec-manual\spectro-tests' + '\\' + 'spectro-5-db.h5', r'E:\tests\1sec-manual\spectro-tests' + '\\' + 'spectro-6-db.h5']

model_names = [spectro_test_folder + '\\' + 'rs-spec1-r2.kt', spectro_test_folder + '\\' + 'rs-spec2-r2.kt', spectro_test_folder + '\\' + 'rs-spec3-r2.kt',
               spectro_test_folder + '\\' + 'rs-spec4-r2.kt', spectro_test_folder + '\\' + 'rs-spec5-r2.kt', spectro_test_folder + '\\' + 'rs-spec6-r2.kt']

temp_folders = [spectro_test_folder + '\\' + 'rs-temp-1', spectro_test_folder + '\\' + 'rs-temp-2', spectro_test_folder + '\\' + 'rs-temp-3',
               spectro_test_folder + '\\' + 'rs-temp-4', spectro_test_folder + '\\' + 'rs-temp-5', spectro_test_folder + '\\' + 'rs-temp-6']

In [28]:
for idex, spec in enumerate(spectro_files): 

    spec_file = spectro_folder + '\\' + 'spec_config_' + str(idex+1) + '.json'
    
    # join into a database
    # Load the spectrogram representation & parameters, this returns a dict 
    spec_cfg = load_audio_representation(spec_file, name="spectrogram")

    # Set the random seed for numpy and tensorflow 
    np.random.seed(1000)
    tf.random.set_seed(2000)
    
    # Set the batch size and number of epochs for training
    batch_size = 16
    n_epochs = 40
    
    # Set the log folder and checkpoint folder 
    log_folder = spectro_test_folder + '\\' + 'logs' + str(idex)
    checkpoint_folder = spectro_test_folder + '\\' + 'checkpoints' + str(idex)
    
    # Open the database file in read mode
    db = dbi.open_file(db_names[idex], 'r')
    
    # Open the training and validation tables respectively 
    train_data = dbi.open_table(db, "/train/data")
    val_data = dbi.open_table(db, "/val/data")
    
    # Create batches of training data of size batch size, using the specified data table 
    # This returns indices of the data in each batch along with their labels 
    train_generator = BatchGenerator(batch_size=batch_size, data_table=train_data,
                                        output_transform_func=ResNetInterface.transform_batch,
                                        shuffle=True, refresh_on_epoch_end=True)
    
    # Create batches of validation data of size batch size, using the specified data table 
    # This returns indices of the data in each batch along with their labels 
    val_generator = BatchGenerator(batch_size=batch_size, data_table=val_data,
                                       output_transform_func=ResNetInterface.transform_batch,
                                       shuffle=False, refresh_on_epoch_end=False)
    
    # Build the ResNet model file based off of the recipe file - this creates a "ResNetInterface" object 
    resnet = ResNetInterface.build(recipe)
    
    # Set the training and validation generators to the batch generators created above 
    resnet.train_generator = train_generator
    resnet.val_generator = val_generator
    
    # Set the model log and checkpoint directory 
    resnet.log_dir = log_folder
    resnet.checkpoint_dir = checkpoint_folder
    
    # Train the model, looping through all of the training and validation data 
    # See code map for more information
    resnet.train_loop(n_epochs=n_epochs, verbose=False, log_csv=True, csv_name='log.csv')
    
    # Close the database 
    db.close()
    
    # Save the model file, and keep track of the spectrogram parameters used to generate that model 
    resnet.save(model_names[idex], audio_repr_file=spec_file)

    print('Done spectro ' + str(idex+1))

Done spectro 1
Done spectro 2
Done spectro 3
Done spectro 4
Done spectro 5
Done spectro 6


# Auto Dataset

In [29]:
pos_folder = r'E:\tests\1sec-auto\inputs\annots\pos'

# Get list of all csv files in that folder
files_pos = glob.glob(pos_folder + "/*.csv")

site_names = []
num_annots = []

# For each csv file
for file in files_pos:

    annots = pd.read_csv(file)

    site_name = file.split('\\')[-1].split('.')[0].split('_')[0]

    site_names.append(site_name)
    num_annots.append(len(annots))

ULU22_val = num_annots[site_names.index('ULU2022')]
other_val = sum(num_annots) - ULU22_val
all_annots = sum(num_annots)

ulu_2022_split = 0.32
all_else_split = 1 - ulu_2022_split

dataset_split = [0.7, 0.2, 0.1]

train_annots = round(all_annots*dataset_split[0])
val_annots = round(all_annots*dataset_split[1])
test_annots = round(all_annots*dataset_split[2])

# ulu22 vals
ulu22_tr = round(train_annots*ulu_2022_split)
ulu22_va = round(val_annots*ulu_2022_split)
ulu22_te = round(test_annots*ulu_2022_split)

ulu22_leftovers = ULU22_val - ulu22_tr - ulu22_va - ulu22_te

if ulu22_leftovers < 0:
    ulu22_te = ulu22_te + ulu22_leftovers

if ulu22_leftovers > 0 :
    ulu22_tr = ulu22_tr + ulu22_leftovers

all_ulu = ulu22_tr + ulu22_te + ulu22_va

if all_ulu != ULU22_val:
    print('Something went wrong with Ulu')
    exit()

ulu2022_vals = [ulu22_tr, ulu22_va, ulu22_te]

# rest vals
rest_tr = round(train_annots*all_else_split)
rest_va = round(val_annots*all_else_split)
rest_te = round(test_annots*all_else_split)

# totals
all_added = rest_tr + rest_va + rest_te

if all_added < other_val:
    leftover = other_val - all_added
    rest_tr = rest_tr + leftover

if all_added > other_val:
    leftover = all_added - other_val
    rest_va = rest_va - leftover

all_added2 = rest_tr + rest_va + rest_te

cb_perc = num_annots[0]/all_added2
kk_perc = num_annots[1]/all_added2
ulu_perc = num_annots[4]/all_added2

# split into other site vals
cb_tr = round(cb_perc*rest_tr)
cb_va = round(cb_perc*rest_va)
cb_te = round(cb_perc*rest_te)
total_cb = cb_tr + cb_va + cb_te
if total_cb < num_annots[0]:
    leftover_cb = num_annots[0] - total_cb
    cb_tr = cb_tr + leftover_cb
if total_cb > num_annots[0]:
    leftover_cb = num_annots[0] - total_cb
    cb_va = cb_va + leftover_cb
cb_vals = [cb_tr, cb_va, cb_te]

kk_tr = round(kk_perc*rest_tr)
kk_va = round(kk_perc*rest_va)
kk_te = round(kk_perc*rest_te)
total_kk = kk_tr + kk_va + kk_te
if total_kk < num_annots[1]:
    leftover_kk = num_annots[1] - total_kk
    kk_tr = kk_tr + leftover_kk
if total_kk > num_annots[1]:
    leftover_kk = num_annots[1] - total_kk
    kk_va = kk_va + leftover_kk
kk_vals = [kk_tr, kk_va, kk_te]
    
ulu_tr = round(ulu_perc*rest_tr)
ulu_va = round(ulu_perc*rest_va)
ulu_te = round(ulu_perc*rest_te)
total_ulu = ulu_tr + ulu_va + ulu_te
if total_ulu < num_annots[4]:
    leftover_ulu = num_annots[4] - total_ulu
    ulu_tr = ulu_tr + leftover_ulu
if total_ulu > num_annots[4]:
    leftover_ulu = num_annots[4] - total_ulu
    ulu_va = ulu_va + leftover_ulu
ulu_vals = [ulu_tr, ulu_va, ulu_te]

print('ulu22 vals (tr, va, te): ' + str(ulu2022_vals) + ', total: ' + str(sum(ulu2022_vals)))
print('ulu vals (tr, va, te): ' + str(ulu_vals) + ', total: ' + str(sum(ulu_vals)))
print('kk vals (tr, va, te): ' + str(kk_vals) + ', total: ' + str(sum(kk_vals)))
print('cb vals (tr, va, te): ' + str(cb_vals) + ', total: ' + str(sum(cb_vals)))

ulu22 vals (tr, va, te): [2266, 647, 262], total: 3175
ulu vals (tr, va, te): [1568, 442, 221], total: 2231
kk vals (tr, va, te): [2874, 811, 405], total: 4090
cb vals (tr, va, te): [294, 83, 41], total: 418


In [30]:
# Load in user inputs 
main_folder = r'E:\tests\1sec-auto'

# These are copied from the 2sec edited folder 
neg_folder = r'E:\tests\1sec-auto\inputs\annots\neg'
pos_folder = r'E:\tests\1sec-auto\inputs\annots\pos'

file_durations_file = r'E:\tests\all_file_durations_complete.xlsx'
file_durations = pd.read_excel(file_durations_file)

data_folder = r'D:\ringed-seal-data'

In [31]:
## Create Database ##

# negatives tables and standarize for ketos
ulu_neg = pd.read_csv(neg_folder + '\\' + 'ULU_all_formatted_shifted_negatives.csv')
ulu_neg = ulu_neg.ffill()
ulu_neg = sl.standardize(table=ulu_neg)
print('Negatives standardized? ' + str(sl.is_standardized(ulu_neg)))

ulu2022_neg = pd.read_csv(neg_folder + '\\' + 'ULU2022_all_formatted_shifted_negatives.csv')
ulu2022_neg = ulu2022_neg.ffill()
ulu2022_neg = sl.standardize(table=ulu2022_neg)
print('Negatives standardized? ' + str(sl.is_standardized(ulu2022_neg)))

kk_neg = pd.read_csv(neg_folder + '\\' + 'KK_all_formatted_shifted_negatives.csv')
kk_neg = kk_neg.ffill()
kk_neg = sl.standardize(table=kk_neg)
print('Negatives standardized? ' + str(sl.is_standardized(kk_neg)))

cb_neg = pd.read_csv(neg_folder + '\\' + 'CB_all_formatted_shifted_negatives.csv')
cb_neg = cb_neg.ffill()
cb_neg = sl.standardize(table=cb_neg)
print('Negatives standardized? ' + str(sl.is_standardized(cb_neg)))

# positives tables
ulu_pos = pd.read_csv(pos_folder + '\\' + 'ULU_all_formatted_shifted.csv')
ulu_pos = ulu_pos.ffill()
ulu_pos = sl.standardize(table=ulu_pos, start_labels_at_1=True)
print('Positives standardized? ' + str(sl.is_standardized(ulu_pos)))

ulu2022_pos = pd.read_csv(pos_folder + '\\' + 'ULU2022_all_formatted_shifted.csv')
ulu2022_pos = ulu2022_pos.ffill()
ulu2022_pos = sl.standardize(table=ulu2022_pos, start_labels_at_1=True)
print('Positives standardized? ' + str(sl.is_standardized(ulu2022_pos)))

kk_pos = pd.read_csv(pos_folder + '\\' + 'KK_all_formatted_shifted.csv')
kk_pos = kk_pos.ffill()
kk_pos = sl.standardize(table=kk_pos, start_labels_at_1=True)
print('Positives standardized? ' + str(sl.is_standardized(kk_pos)))

cb_pos = pd.read_csv(pos_folder + '\\' + 'CB_all_formatted_shifted.csv')
cb_pos = cb_pos.ffill()
cb_pos = sl.standardize(table=cb_pos, start_labels_at_1=True)
print('Positives standardized? ' + str(sl.is_standardized(cb_pos)))

# join into complete tables

ulu_pos_tr = ulu_pos.head(ulu_vals[0])
ulu_pos_va = ulu_pos[ulu_vals[0]:ulu_vals[0] + ulu_vals[1]]
ulu_pos_te = ulu_pos.tail(ulu_vals[2])

ulu_neg_tr = ulu_neg.head(ulu_vals[0])
ulu_neg_va = ulu_neg[ulu_vals[0]:ulu_vals[0] + ulu_vals[1]]
ulu_neg_te = ulu_neg.tail(ulu_vals[2])

ulu_tr = pd.concat([ulu_pos_tr, ulu_neg_tr])
ulu_va = pd.concat([ulu_pos_va, ulu_neg_va])
ulu_te = pd.concat([ulu_pos_te, ulu_neg_te])

ulu2022_pos_tr = ulu2022_pos.head(ulu2022_vals[0])
ulu2022_pos_va = ulu2022_pos[ulu2022_vals[0]:ulu2022_vals[0] + ulu2022_vals[1]]
ulu2022_pos_te = ulu2022_pos.tail(ulu2022_vals[2])

ulu2022_neg_tr = ulu2022_neg.head(ulu2022_vals[0])
ulu2022_neg_va = ulu2022_neg[ulu2022_vals[0]:ulu2022_vals[0] + ulu2022_vals[1]]
ulu2022_neg_te = ulu2022_neg.tail(ulu2022_vals[2])

ulu2022_tr = pd.concat([ulu2022_pos_tr, ulu2022_neg_tr])
ulu2022_va = pd.concat([ulu2022_pos_va, ulu2022_neg_va])
ulu2022_te = pd.concat([ulu2022_pos_te, ulu2022_neg_te])

kk_pos_tr = kk_pos.head(kk_vals[0])
kk_pos_va = kk_pos[kk_vals[0]:kk_vals[0] + kk_vals[1]]
kk_pos_te = kk_pos.tail(kk_vals[2])

kk_neg_tr = kk_neg.head(kk_vals[0])
kk_neg_va = kk_neg[kk_vals[0]:kk_vals[0] + kk_vals[1]]
kk_neg_te = kk_neg.tail(kk_vals[2])

kk_tr = pd.concat([kk_pos_tr, kk_neg_tr])
kk_va = pd.concat([kk_pos_va, kk_neg_va])
kk_te = pd.concat([kk_pos_te, kk_neg_te])

cb_pos_tr = cb_pos.head(cb_vals[0])
cb_pos_va = cb_pos[cb_vals[0]:cb_vals[0] + cb_vals[1]]
cb_pos_te = cb_pos.tail(cb_vals[2])

cb_neg_tr = cb_neg.head(cb_vals[0])
cb_neg_va = cb_neg[cb_vals[0]:cb_vals[0] + cb_vals[1]]
cb_neg_te = cb_neg.tail(cb_vals[2])

cb_tr = pd.concat([cb_pos_tr, cb_neg_tr])
cb_va = pd.concat([cb_pos_va, cb_neg_va])
cb_te = pd.concat([cb_pos_te, cb_neg_te])

# final three tables

train = pd.concat([ulu_tr, ulu2022_tr, cb_tr, kk_tr])
val = pd.concat([ulu_va, ulu2022_va, cb_va, kk_va])
test = pd.concat([ulu_te, ulu2022_te, cb_te, kk_te])

Negatives standardized? True
Negatives standardized? True
Negatives standardized? True
Negatives standardized? True
Positives standardized? True
Positives standardized? True
Positives standardized? True
Positives standardized? True


In [32]:
drop_rows_tr = []

print('cleaning training table of original length ' + str(len(train)))

for idex, row in train.iterrows():

    # filename is row[0], end time is idex.end
    index = file_durations.loc[file_durations['filename'] == row.name[0]].index
    duration = file_durations['duration'][index].values[0]

    if duration < row.end:
        # drop the row corresponding to that sel_id and filename from the dataframe
        drop_rows_tr.append(idex)

    if row.start < 0:
        drop_rows_tr.append(idex)

print('Number of dropped rows: ' + str(len(drop_rows_tr)))

train = train.drop(drop_rows_tr)
print(len(train))

cleaning training table of original length 14004
Number of dropped rows: 21
13969


In [33]:
drop_rows_va = []

print('cleaning validation table of original length ' + str(len(val)))

for idex, row in val.iterrows():

    # filename is row[0], end time is idex.end
    index = file_durations.loc[file_durations['filename'] == row.name[0]].index
    duration = file_durations['duration'][index].values[0]

    if duration < row.end:
        # drop the row corresponding to that sel_id and filename from the dataframe
        drop_rows_va.append(idex)

    if row.start < 0:
        drop_rows_va.append(idex)

print('Number of dropped rows: ' + str(len(drop_rows_va)))

val = val.drop(drop_rows_va)
print(len(val))

cleaning validation table of original length 3966
Number of dropped rows: 9
3956


In [34]:
drop_rows_te = []

print('cleaning test table of original length ' + str(len(test)))

for idex, row in test.iterrows():

    # filename is row[0], end time is idex.end
    index = file_durations.loc[file_durations['filename'] == row.name[0]].index
    duration = file_durations['duration'][index].values[0]

    if duration < row.end:
        # drop the row corresponding to that sel_id and filename from the dataframe
        drop_rows_te.append(idex)

    if row.start < 0:
        drop_rows_te.append(idex)

print('Number of dropped rows: ' + str(len(drop_rows_te)))

test = test.drop(drop_rows_te)
print(len(test))

cleaning test table of original length 1858
Number of dropped rows: 5
1849


In [36]:
# Get number of spectrogram tests 
spectro_folder = r'E:\tests\spectro-files'
spectro_files = glob.glob(spectro_folder + "/*.json")

recipe = r'E:\tests\recipe-files\resnet_recipe-1.json'

spectro_test_folder = r'E:\tests\1sec-auto\recipe-1'

# one: 100 to 1200 (usual)
# two: 0 to 1500
# three: 0 to 3000
# four: usual, rate 2000
# five: usual, rate 4000
# six: usual, window 0.032

db_names = [spectro_test_folder + '\\' + 'spectro-1-db.h5', spectro_test_folder + '\\' + 'spectro-2-db.h5', spectro_test_folder + '\\' + 'spectro-3-db.h5',
             spectro_test_folder + '\\' + 'spectro-4-db.h5', spectro_test_folder + '\\' + 'spectro-5-db.h5', spectro_test_folder + '\\' + 'spectro-6-db.h5']

model_names = [spectro_test_folder + '\\' + 'rs-spec1.kt', spectro_test_folder + '\\' + 'rs-spec2.kt', spectro_test_folder + '\\' + 'rs-spec3.kt',
               spectro_test_folder + '\\' + 'rs-spec4.kt', spectro_test_folder + '\\' + 'rs-spec5.kt', spectro_test_folder + '\\' + 'rs-spec6.kt']

temp_folders = [spectro_test_folder + '\\' + 'rs-temp-1', spectro_test_folder + '\\' + 'rs-temp-2', spectro_test_folder + '\\' + 'rs-temp-3',
               spectro_test_folder + '\\' + 'rs-temp-4', spectro_test_folder + '\\' + 'rs-temp-5', spectro_test_folder + '\\' + 'rs-temp-6']

In [37]:
# For each spectro file:
for idex, spec in enumerate(spectro_files):

    spec_file = spectro_folder + '\\' + 'spec_config_' + str(idex+1) + '.json'
    
    # join into a database
    # Load the spectrogram representation & parameters, this returns a dict 
    spec_cfg = load_audio_representation(spec_file, name="spectrogram")

    # Create a table called "train" in the database, defined by db_name, using the "train" selections table, the spectrogram config, and the audio data 
    # Behind the hood, this creates an AudioLoader and AudioWriter Ketos function which generates the spectrograms for each selection 
    # For the specific spectrogram in this case, the spectrograms are of size [1500,56] where 56 refers to the frequency dimension and 1500 refers to the time dimension 
    # The size of the spectrogram is 1500*56, which is 84000
    dbi.create_database(output_file=db_names[idex],  # empty brackets
                        dataset_name=r'train', selections=train, data_dir=data_folder,
                        audio_repres=spec_cfg)
    
    dbi.create_database(output_file=db_names[idex],  # empty brackets
                        dataset_name=r'val', selections=val, data_dir=data_folder,
                        audio_repres=spec_cfg)
    
    dbi.create_database(output_file=db_names[idex],  # empty brackets
                        dataset_name=r'test', selections=test, data_dir=data_folder,
                        audio_repres=spec_cfg)

100%|████████████████████████████████████████████████████████████████████████████| 13969/13969 [06:03<00:00, 38.42it/s]


13969 items saved to E:\tests\1sec-auto\recipe-1\spectro-1-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 3956/3956 [01:34<00:00, 41.94it/s]


3956 items saved to E:\tests\1sec-auto\recipe-1\spectro-1-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1849/1849 [00:45<00:00, 41.06it/s]


1849 items saved to E:\tests\1sec-auto\recipe-1\spectro-1-db.h5


100%|████████████████████████████████████████████████████████████████████████████| 13969/13969 [05:56<00:00, 39.21it/s]


13969 items saved to E:\tests\1sec-auto\recipe-1\spectro-2-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 3956/3956 [01:32<00:00, 42.72it/s]


3956 items saved to E:\tests\1sec-auto\recipe-1\spectro-2-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1849/1849 [00:45<00:00, 40.90it/s]


1849 items saved to E:\tests\1sec-auto\recipe-1\spectro-2-db.h5


100%|████████████████████████████████████████████████████████████████████████████| 13969/13969 [05:44<00:00, 40.59it/s]


13969 items saved to E:\tests\1sec-auto\recipe-1\spectro-3-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 3956/3956 [01:36<00:00, 40.96it/s]


3956 items saved to E:\tests\1sec-auto\recipe-1\spectro-3-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1849/1849 [00:46<00:00, 39.43it/s]


1849 items saved to E:\tests\1sec-auto\recipe-1\spectro-3-db.h5


100%|████████████████████████████████████████████████████████████████████████████| 13969/13969 [05:33<00:00, 41.84it/s]


13969 items saved to E:\tests\1sec-auto\recipe-1\spectro-4-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 3956/3956 [01:31<00:00, 43.28it/s]


3956 items saved to E:\tests\1sec-auto\recipe-1\spectro-4-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1849/1849 [00:43<00:00, 42.89it/s]


1849 items saved to E:\tests\1sec-auto\recipe-1\spectro-4-db.h5


100%|████████████████████████████████████████████████████████████████████████████| 13969/13969 [06:58<00:00, 33.39it/s]


13969 items saved to E:\tests\1sec-auto\recipe-1\spectro-5-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 3956/3956 [01:57<00:00, 33.64it/s]


3956 items saved to E:\tests\1sec-auto\recipe-1\spectro-5-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1849/1849 [00:55<00:00, 33.10it/s]


1849 items saved to E:\tests\1sec-auto\recipe-1\spectro-5-db.h5


100%|████████████████████████████████████████████████████████████████████████████| 13969/13969 [05:18<00:00, 43.85it/s]


13969 items saved to E:\tests\1sec-auto\recipe-1\spectro-6-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 3956/3956 [01:27<00:00, 45.22it/s]


3956 items saved to E:\tests\1sec-auto\recipe-1\spectro-6-db.h5


100%|██████████████████████████████████████████████████████████████████████████████| 1849/1849 [00:40<00:00, 45.15it/s]

1849 items saved to E:\tests\1sec-auto\recipe-1\spectro-6-db.h5





In [None]:
for idex, spec in enumerate(spectro_files): 

    spec_file = spectro_folder + '\\' + 'spec_config_' + str(idex+1) + '.json'
    
    # join into a database
    # Load the spectrogram representation & parameters, this returns a dict 
    spec_cfg = load_audio_representation(spec_file, name="spectrogram")

    # Set the random seed for numpy and tensorflow 
    np.random.seed(1000)
    tf.random.set_seed(2000)
    
    # Set the batch size and number of epochs for training
    batch_size = 16
    n_epochs = 40
    
    # Set the log folder and checkpoint folder 
    log_folder = spectro_test_folder + '\\' + 'logs' + str(idex)
    checkpoint_folder = spectro_test_folder + '\\' + 'checkpoints' + str(idex)
    
    # Open the database file in read mode
    db = dbi.open_file(db_names[idex], 'r')
    
    # Open the training and validation tables respectively 
    train_data = dbi.open_table(db, "/train/data")
    val_data = dbi.open_table(db, "/val/data")
    
    # Create batches of training data of size batch size, using the specified data table 
    # This returns indices of the data in each batch along with their labels 
    train_generator = BatchGenerator(batch_size=batch_size, data_table=train_data,
                                        output_transform_func=ResNetInterface.transform_batch,
                                        shuffle=True, refresh_on_epoch_end=True)
    
    # Create batches of validation data of size batch size, using the specified data table 
    # This returns indices of the data in each batch along with their labels 
    val_generator = BatchGenerator(batch_size=batch_size, data_table=val_data,
                                       output_transform_func=ResNetInterface.transform_batch,
                                       shuffle=False, refresh_on_epoch_end=False)
    
    # Build the ResNet model file based off of the recipe file - this creates a "ResNetInterface" object 
    resnet = ResNetInterface.build(recipe)
    
    # Set the training and validation generators to the batch generators created above 
    resnet.train_generator = train_generator
    resnet.val_generator = val_generator
    
    # Set the model log and checkpoint directory 
    resnet.log_dir = log_folder
    resnet.checkpoint_dir = checkpoint_folder
    
    # Train the model, looping through all of the training and validation data 
    # See code map for more information
    resnet.train_loop(n_epochs=n_epochs, verbose=False, log_csv=True, csv_name='log.csv')
    
    # Close the database 
    db.close()
    
    # Save the model file, and keep track of the spectrogram parameters used to generate that model 
    resnet.save(model_names[idex], audio_repr_file=spec_file)

    print('Done spectro ' + str(idex+1))