In [1]:
import os
import math
import shutil
import numpy as np
from multiprocessing import Pool
from keras.optimizers import RMSprop, SGD
from keras.losses import categorical_crossentropy
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

Using TensorFlow backend.


In [2]:
GPU = '0'
RND = 1
RUN = 'B'
OUT_DIR = 'out'
TENSORBOARD_DIR = '/tensorboard/tf-speech-v2/%s_$fold$' % RUN
MODELS_DIR = '%s/models/run_%s/fold_$fold$' % (OUT_DIR, RUN)
INPUT_SIZE = (64, 64, 1)  # n_mels x width x 1ch
FOLDS = 10

In [3]:
%run '../data-generator.ipynb'
%run '../models.ipynb'

In [4]:
def choose_batch(n, train_X, train_Y, train_files, val_files):
    assert isinstance(val_files, set)

    # extra random indexes to search for files not in val_files
    def _extra_indexes():
        return np.random.randint(0, len(train_X), size=int(n * 0.15))

    ii = np.random.randint(0, len(train_X), size=n)
    extra_ii = []

    replaced = 0

    # replace indexes with files occuring in val_files
    for j in range(len(ii)):
        if '(silence)' != train_files[ii[j]]:
            while train_files[ii[j]] in val_files:
                if len(extra_ii) == 0: extra_ii = _extra_indexes()
                ii[j], extra_ii = extra_ii[0], extra_ii[1:]
                replaced += 1

    X = train_X[ii]
    Y = train_Y[ii]
    files = train_files[ii]
    
    return X, Y, files

In [5]:
train_X = np.memmap('%s/train_X.mem' % OUT_DIR, np.float32,
                    'r').reshape((-1, ) + INPUT_SIZE)
train_Y = np.memmap('%s/train_Y.mem' % OUT_DIR, np.float32, 'r').reshape(
    (-1, len(LABELS)))

train_files = np.load('%s/train_files.npy' % OUT_DIR)

assert len(train_Y) == len(train_X)
assert len(train_files) == len(train_X)

print('len(train_X):', len(train_X))

len(train_X): 1000000


In [6]:
# training params
N_PER_BATCH = 1000
# last number splits train set into XX epochs
STEPS_PER_EPOCH = len(train_X) // N_PER_BATCH // 10
N_EPOCHS = 100

In [7]:
test_X = np.memmap('%s/test/test_X.mem' % (OUT_DIR), np.float32,
                   'r').reshape((-1, ) + INPUT_SIZE)

In [8]:
for fold in range(FOLDS):

    print('fold:', fold)

    # read val data
    val_X = np.load('%s/val/val_X_%d.npy' % (OUT_DIR, fold))
    val_Y = np.load('%s/val/val_Y_%d.npy' % (OUT_DIR, fold))
    val_files = np.load('%s/val/val_files_%d.npy' % (OUT_DIR, fold))
    assert len(val_X) == len(val_files)
    assert len(val_Y) == len(val_files)
    print('len(val_X):', len(val_X))
    val_files = set(val_files)

    # create dir to store models
    models_dir = MODELS_DIR.replace('$fold$', str(fold))
    os.makedirs(models_dir, exist_ok=True)
    print('models_dir:', models_dir)

    def train_generator(n_per_batch):
        while True:
            X, Y, files = choose_batch(n_per_batch, train_X, train_Y,
                                       train_files, val_files)
            yield (X, Y)

    # rm/create tensorboard dir
    tensorboard_dir = TENSORBOARD_DIR.replace('$fold$', str(fold))
    shutil.rmtree(tensorboard_dir, ignore_errors=True)
    os.makedirs(tensorboard_dir)
    print('tensorboard_dir:', tensorboard_dir)

    # create model
    model = Model_3(input_size=INPUT_SIZE, output_size=len(LABELS))
    model.build()
    optimizer = RMSprop(lr=1e-3)
    model.m.compile(
        optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy']\
    )

    model.m.fit_generator(
        train_generator(N_PER_BATCH),
        STEPS_PER_EPOCH,
        epochs=N_EPOCHS,
        validation_data=(val_X, val_Y),
        callbacks=[
            TensorBoard(log_dir=tensorboard_dir),
            ModelCheckpoint(
                models_dir +
                '/e{epoch:03d}-l={loss:.5f}-vl={val_loss:.5f}-a={acc:.5f}-va={val_acc:.5f}.h5',
                monitor='val_acc',
                verbose=0,
                save_best_only=True,
                save_weights_only=False,
                mode='auto'),
            ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.2,
                patience=1,
                min_lr=1e-9,
                verbose=1),
            EarlyStopping(
                monitor='val_loss',
                min_delta=0.000001,
                patience=5,
                verbose=1,
                mode='auto')
        ])

    # predict on holdout
    holdout_X = np.load('%s/holdout/holdout_X.npy' % (OUT_DIR))
    holdout_Y = np.load('%s/holdout/holdout_Y.npy' % (OUT_DIR))
    hp = model.m.predict(holdout_X)
    np.save('%s/holdout/holdout_predictions_%d.npy' % (OUT_DIR, fold), hp)

    # eval on holdout
    print('evaluation on holdout:')
    print(model.m.metrics_names)
    print(model.m.evaluate(holdout_X, holdout_Y, verbose=0))

    # predict on test data
    test_predictions = model.m.predict(test_X, verbose=1, batch_size=1000)
    np.save('%s/test/test_predictions_%d.npy' % (OUT_DIR, fold),
            test_predictions)

    print('')

fold: 0
len(val_X): 2441
models_dir: out/models/run_B/fold_0
tensorboard_dir: /tensorboard/tf-speech-v2/B_0
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 00008: reducing learning rate to 0.00020000000949949026.
Epoch 10/100
Epoch 00009: reducing learning rate to 4.0000001899898055e-05.
Epoch 11/100
Epoch 00010: reducing learning rate to 8.000000525498762e-06.
Epoch 12/100
Epoch 00011: reducing learning rate to 1.6000001778593287e-06.
Epoch 13/100
Epoch 00012: reducing learning rate to 3.200000264769187e-07.
Epoch 00012: early stopping
evaluation on holdout:
['loss', 'acc']
[0.28956849448918365, 0.95099999999999996]

fold: 1
len(val_X): 2441
models_dir: out/models/run_B/fold_1
tensorboard_dir: /tensorboard/tf-speech-v2/B_1
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 9/100
Epoch 10/100
Epoch 00009: reducing learning rate to 4.0000001899898055e-05.
Epoch 11/100
Epoch 00010: reducing learning rate to 

Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 00008: reducing learning rate to 0.00020000000949949026.
Epoch 10/100
Epoch 00010: reducing learning rate to 8.000000525498762e-06.
Epoch 12/100
Epoch 00011: reducing learning rate to 1.6000001778593287e-06.
Epoch 13/100
Epoch 00012: reducing learning rate to 3.200000264769187e-07.
Epoch 00012: early stopping
evaluation on holdout:
['loss', 'acc']
[0.30860064729166697, 0.94925000000000004]

fold: 4
len(val_X): 2440
models_dir: out/models/run_B/fold_4
tensorboard_dir: /tensorboard/tf-speech-v2/B_4
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 00008: reducing learning rate to 0.00020000000949949026.
Epoch 10/100
Epoch 00009: reducing learning rate to 4.0000001899898055e-05.
Epoch 11/100
Epoch 00010: reducing learning rate to 8.000000525498762e-06.
Epoch 12/100
Epoch 00011: reducing learning rate to 1.6000001778593287e-06.
Epoch 13/100
Epoch 000

Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 00010: reducing learning rate to 0.00020000000949949026.
Epoch 12/100
Epoch 00011: reducing learning rate to 4.0000001899898055e-05.
Epoch 13/100
Epoch 00012: reducing learning rate to 8.000000525498762e-06.
Epoch 14/100
Epoch 00013: reducing learning rate to 1.6000001778593287e-06.
Epoch 15/100
Epoch 00014: reducing learning rate to 3.200000264769187e-07.
Epoch 00014: early stopping
evaluation on holdout:
['loss', 'acc']
[0.3269985166520637, 0.95125000000000004]

fold: 7
len(val_X): 2440
models_dir: out/models/run_B/fold_7
tensorboard_dir: /tensorboard/tf-speech-v2/B_7
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 00008: reducing learning rate to 0.00020000000949949026.
Epoch 10/100
Epoch 00009: reducing learning rate to 4.0000001899898055e-05.
Epoch 11/100
Epoch 00010: reducing learning rate to 8.000000525498762e-0

Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 00010: reducing learning rate to 0.00020000000949949026.
Epoch 12/100
Epoch 00011: reducing learning rate to 4.0000001899898055e-05.
Epoch 13/100
Epoch 00012: reducing learning rate to 8.000000525498762e-06.
Epoch 14/100
Epoch 00013: reducing learning rate to 1.6000001778593287e-06.
Epoch 15/100
Epoch 00014: reducing learning rate to 3.200000264769187e-07.
Epoch 00014: early stopping
evaluation on holdout:
['loss', 'acc']
[0.33821652156962134, 0.94650000000000001]

