In [1]:
import numpy as np
import os
import json
import pandas as pd
from keras.callbacks import ModelCheckpoint, EarlyStopping

import tom
%reload_ext autoreload
%autoreload 2

Using TensorFlow backend.


In [2]:
DATA_ROOT = '/beegfs/qx244/ds/openmic-2018/'

if not os.path.exists(DATA_ROOT):
    raise ValueError('Did you forget to set `DATA_ROOT`?')
    
with np.load(os.path.join(DATA_ROOT, 'openmic-2018.npz')) as OPENMIC:
    Y_true, Y_mask, sample_key = OPENMIC['Y_true'], OPENMIC['Y_mask'], OPENMIC['sample_key']
with open(os.path.join(DATA_ROOT, 'class-map.json'), 'r') as f:
    class_map = json.load(f)

In [3]:
# Let's split the data into the training and test set
# We use squeeze=True here to return a single array for each, rather than a full DataFrame
split_train = pd.read_csv('tom_partition/split_train.csv', squeeze=True, header=None)
split_test = pd.read_csv('tom_partition/split_test.csv', squeeze=True, header=None)
split_val = pd.read_csv('tom_partition/split_val.csv', squeeze=True, header=None)

In [4]:
split_train.shape[0] + split_test.shape[0] + split_val.shape[0]

20000

In [5]:

val_set = set(split_val)
train_set = set(split_train)
test_set = set(split_test)

# These loops go through all sample keys, and save their row numbers
# to either idx_train or idx_test
#
# This will be useful in the next step for slicing the array data
idx_train, idx_test, idx_val = [], [], []

for idx, n in enumerate(sample_key):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    elif n in val_set:
        idx_val.append(idx)
    else:
        # This should never happen, but better safe than sorry.
        raise RuntimeError('Unknown sample key={}! Abort!'.format(n))
        
# Finally, cast the idx_* arrays to numpy structures
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)
idx_val = np.asarray(idx_val)

In [10]:
early_stopping_cb = EarlyStopping(monitor='val_acc',
                                  patience=3, 
                                  verbose=1, 
                                  mode='max', 
                                  restore_best_weights=True)

checkpoint_cb = ModelCheckpoint('saved_models/model_chkpt.{epoch:02d}-{val_acc:.4f}.hdf5', 
                                monitor='val_acc', 
                                verbose=0, 
                                save_best_only=True, 
                                mode='max', period=1)

cbs = [early_stopping_cb, checkpoint_cb]

In [13]:
np.random.shuffle(idx_train)
np.random.shuffle(idx_test)
np.random.shuffle(idx_val)

batch_size = 16
train_gen = tom.MelGenerator(idx_train[:1600], batch_size=batch_size, DATA_ROOT=DATA_ROOT)
val_gen = tom.MelGenerator(idx_val, batch_size=batch_size, DATA_ROOT=DATA_ROOT)
test_gen = tom.MelGenerator(idx_test, batch_size=batch_size, DATA_ROOT=DATA_ROOT)

In [14]:
model = tom.construct_crnnL3_smp_tom()

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [15]:
model.fit_generator(
    generator=train_gen,
    epochs=500,
    validation_data=val_gen,
    callbacks=cbs)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Restoring model weights from the end of the best epoch
Epoch 00009: early stopping


<keras.callbacks.History at 0x2b3a8fd7f160>

In [16]:
model.evaluate_generator(test_gen, verbose=1)



[0.4774095987148341, 0.9509734444679178]

In [20]:
# serialize model to JSON
model_json = model.to_json()
with open("model_5epoch.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model_5epoch.h5")
print("Saved model to disk")

Saved model to disk
