In [2]:
import numpy as np
import os
import json
import pandas as pd

import tom
%reload_ext autoreload
%autoreload 2

Using TensorFlow backend.


In [3]:
DATA_ROOT = '/beegfs/qx244/ds/openmic-2018/'

if not os.path.exists(DATA_ROOT):
    raise ValueError('Did you forget to set `DATA_ROOT`?')
    
with np.load(os.path.join(DATA_ROOT, 'openmic-2018.npz')) as OPENMIC:
    Y_true, Y_mask, sample_key = OPENMIC['Y_true'], OPENMIC['Y_mask'], OPENMIC['sample_key']
with open(os.path.join(DATA_ROOT, 'class-map.json'), 'r') as f:
    class_map = json.load(f)

In [4]:
# Let's split the data into the training and test set
# We use squeeze=True here to return a single array for each, rather than a full DataFrame
split_train = pd.read_csv(os.path.join(DATA_ROOT, 'partitions/split01_train.csv'),       
                          header=None, squeeze=True)
split_test = pd.read_csv(os.path.join(DATA_ROOT, 'partitions/split01_test.csv'), 
                         header=None, squeeze=True)
train_set = set(split_train)
test_set = set(split_test)

# These loops go through all sample keys, and save their row numbers
# to either idx_train or idx_test
#
# This will be useful in the next step for slicing the array data
idx_train, idx_test = [], []

for idx, n in enumerate(sample_key):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    else:
        # This should never happen, but better safe than sorry.
        raise RuntimeError('Unknown sample key={}! Abort!'.format(sample_key[n]))
        
# Finally, cast the idx_* arrays to numpy structures
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

In [17]:
model = tom.construct_crnnL3_smp_tom()

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [18]:
np.random.shuffle(idx_train)
np.random.shuffle(idx_test)

batch_size = 16
# train_size = 2000
# test_size = 100
train_gen = tom.MelGenerator(idx_train, batch_size=batch_size, DATA_ROOT=DATA_ROOT)
test_gen = tom.MelGenerator(idx_test, batch_size=batch_size, DATA_ROOT=DATA_ROOT)

In [19]:
model.fit_generator(
    generator=train_gen,
    epochs=5,
    validation_data=test_gen)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2b170045a588>

In [20]:
# serialize model to JSON
model_json = model.to_json()
with open("model_5epoch.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model_5epoch.h5")
print("Saved model to disk")

Saved model to disk
