In [1]:
import os
import math
import shutil
import numpy as np
from random import seed
from multiprocessing import Pool

from keras.optimizers import *
from keras.losses import *
from keras.callbacks import *
from keras.utils.training_utils import multi_gpu_model

Using TensorFlow backend.


In [2]:
GPUS = '0,1'
RND = 777
RUN = 'E5'
OUT_DIR = 'out'
TENSORBOARD_DIR = '/tensorboard/tf-speech-v2/%s_$fold$' % RUN
MODELS_DIR = '%s/models/run_%s/fold_$fold$' % (OUT_DIR, RUN)
INPUT_SIZE = (96, 96, 1)  # n_mels x width x 1ch
FOLDS = 10

In [3]:
np.random.seed(RND)
seed(RND)

In [4]:
# make predictions dir
os.makedirs('%s/predictions/run_%s'%(OUT_DIR, RUN), exist_ok=True)

In [5]:
# make only specific GPU to be utilized
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = GPUS

In [6]:
%run '../data-generator.ipynb'
%run '../models.ipynb'

In [7]:
def choose_batch(n, train_X, train_Y, train_files, val_files):
    assert isinstance(val_files, set)

    # extra random indexes to search for files not in val_files
    def _extra_indexes():
        return np.random.randint(0, len(train_X), size=int(n * 0.15))

    ii = np.random.randint(0, len(train_X), size=n)
    extra_ii = []

    replaced = 0

    # replace indexes with files occuring in val_files
    for j in range(len(ii)):
        if '(silence)' != train_files[ii[j]]:
            while train_files[ii[j]] in val_files:
                if len(extra_ii) == 0: extra_ii = _extra_indexes()
                ii[j], extra_ii = extra_ii[0], extra_ii[1:]
                replaced += 1

    X = train_X[ii]
    Y = train_Y[ii]
    files = train_files[ii]
    
    return X, Y, files

In [8]:
train_X = np.memmap('%s/train_X.mem' % OUT_DIR, np.float32,
                    'r').reshape((-1, ) + INPUT_SIZE)
train_Y = np.memmap('%s/train_Y.mem' % OUT_DIR, np.float32, 'r').reshape(
    (-1, len(LABELS)))

train_files = np.load('%s/train_files.npy' % OUT_DIR)

assert len(train_Y) == len(train_X)
assert len(train_files) == len(train_X)

print('len(train_X):', len(train_X))

len(train_X): 1000000


In [9]:
# training params
N_PER_BATCH = 500
# last number splits train set into XX epochs
STEPS_PER_EPOCH = len(train_X) // N_PER_BATCH // 10
N_EPOCHS = 200

In [None]:
test_X = np.memmap('%s/test/test_X.mem' % (OUT_DIR), np.float32,
                   'r').reshape((-1, ) + INPUT_SIZE)

In [None]:
for fold in range(FOLDS):

    print('fold:', fold)

    # read val data
    val_X = np.load('%s/val/val_X_%d.npy' % (OUT_DIR, fold))
    val_Y = np.load('%s/val/val_Y_%d.npy' % (OUT_DIR, fold))
    val_files = np.load('%s/val/val_files_%d.npy' % (OUT_DIR, fold))
    assert len(val_X) == len(val_files)
    assert len(val_Y) == len(val_files)
    print('len(val_X):', len(val_X))
    val_files = set(val_files)

    # create dir to store models
    models_dir = MODELS_DIR.replace('$fold$', str(fold))
    os.makedirs(models_dir, exist_ok=True)
    print('models_dir:', models_dir)

    def train_generator(n_per_batch):
        while True:
            X, Y, files = choose_batch(n_per_batch, train_X, train_Y,
                                       train_files, val_files)
            yield (X, Y)

    # rm/create tensorboard dir
    tensorboard_dir = TENSORBOARD_DIR.replace('$fold$', str(fold))
    shutil.rmtree(tensorboard_dir, ignore_errors=True)
    os.makedirs(tensorboard_dir)
    print('tensorboard_dir:', tensorboard_dir)

    # create model
    model = Model_6(input_size=INPUT_SIZE, output_size=len(LABELS))
    model.build()

    # use x gpus
    
    n_gpus = len(GPUS.split(','))
    
    if n_gpus > 1:
        m = multi_gpu_model(model.m, gpus=n_gpus)
    else:
        m = model.m

    # fit model
    
    optimizer = RMSprop(lr=1e-4)
    m.compile(
        optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy']\
    )

    m.fit_generator(
        train_generator(N_PER_BATCH),
        STEPS_PER_EPOCH,
        epochs=N_EPOCHS,
        validation_data=(val_X, val_Y),
        callbacks=[
            TensorBoard(log_dir=tensorboard_dir),
            #             ModelCheckpoint(
            #                 models_dir +
            #                 '/e{epoch:03d}-l={loss:.5f}-vl={val_loss:.5f}-a={acc:.5f}-va={val_acc:.5f}.h5',
            #                 monitor='val_acc',
            #                 verbose=0,
            #                 save_best_only=True,
            #                 save_weights_only=False,
            #                 mode='auto'),
            ReduceLROnPlateau(
                monitor='val_loss', factor=0.2, patience=3, min_lr=1e-9,
                verbose=1),
            EarlyStopping(
                monitor='val_acc',
                min_delta=0.000001,
                patience=10,
                verbose=1,
                mode='auto')
        ])

    # predict on holdout
    holdout_X = np.load('%s/holdout/holdout_X.npy' % (OUT_DIR))
    holdout_Y = np.load('%s/holdout/holdout_Y.npy' % (OUT_DIR))
    hp = m.predict(holdout_X)
    np.save('%s/predictions/run_%s/holdout_predictions_%d.npy' % (OUT_DIR, RUN,
                                                                  fold), hp)

    # eval on holdout
    print('evaluation on holdout:')
    print(m.metrics_names)
    print(m.evaluate(holdout_X, holdout_Y, verbose=0))

    # predict on test data
    test_predictions = m.predict(test_X, verbose=1, batch_size=100)
    np.save('%s/predictions/run_%s/test_predictions_%d.npy' %
            (OUT_DIR, RUN, fold), test_predictions)

    print('')

fold: 0
len(val_X): 2441
models_dir: out/models/run_E5/fold_0
tensorboard_dir: /tensorboard/tf-speech-v2/E5_0
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 00011: reducing learning rate to 1.9999999494757503e-05.
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 00018: reducing learning rate to 3.999999898951501e-06.
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 00021: reducing learning rate to 7.999999979801942e-07.
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 00024: reducing learning rate to 1.600000018697756e-07.
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 00027: reducing learning rate to 3.199999980552093e-08.
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 00030: reducing learning rate to 6.399999818995639e-09.
Epoch 00030: early stopping
evaluation on holdout:
['loss', 'acc']
[0.26040190061437896, 0.94474999999999998]

fold: 1
len(val_X): 2441


Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 00023: reducing learning rate to 7.999999979801942e-07.
Epoch 24/200

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 29/200
Epoch 00029: reducing learning rate to 3.199999980552093e-08.
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 00032: reducing learning rate to 6.399999818995639e-09.
Epoch 00032: early stopping
evaluation on holdout:
['loss', 'acc']
[0.28468323675438295, 0.94625000000000004]

fold: 2
len(val_X): 2441
models_dir: out/models/run_E5/fold_2
tensorboard_dir: /tensorboard/tf-speech-v2/E5_2
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 00010: reducing learning rate to 1.9999999494757503e-05.
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 00016: reducing learning rate to 3.999999898951501e-06.
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 00019: reducing learning rate to 7.999999979801942e-07.
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 00022: reducing learning rate to 1.600000018697756e-07.
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 00025: reducing learning rate to 3.199999980552093e-08.
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 00028: reducing learning rate to 6.399999818995639e-09.
Epoch 00028: early stopping
evaluation on holdout:
['loss', 'acc']
[0.2308085257386556, 0.94750000000000001]

fold: 4
len(val_X): 2440
models_dir: out/models/run_E5/fold_4
tensorboard_dir: /tensorboard/tf-speech-v2/E5_4
Epoch 1/200
Epoch 2/200

In [None]:
# .4031 .7079 .8292