In [1]:
import os
import math
import shutil
import numpy as np
from multiprocessing import Pool
from keras.optimizers import RMSprop, SGD
from keras.losses import categorical_crossentropy
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

Using TensorFlow backend.


In [2]:
GPU = '0'
RND = 1
RUN = 'A'
OUT_DIR = 'out'
TENSORBOARD_DIR = '/tensorboard/tf-speech-v2/%s' % RUN
MODELS_DIR = '%s/models/run_%s/fold_$fold$' % (OUT_DIR, RUN)
INPUT_SIZE = (64, 64, 1)  # n_mels x width x 1ch
FOLDS = 10

In [3]:
%run '../data-generator.ipynb'

In [4]:
def choose_batch(n, train_X, train_Y, train_files, val_files):
    assert isinstance(val_files, set)

    # extra random indexes to search for files not in val_files
    def _extra_indexes():
        return np.random.randint(0, len(train_X), size=int(n * 0.15))

    ii = np.random.randint(0, len(train_X), size=n)
    extra_ii = []

    replaced = 0

    # replace indexes with files occuring in val_files
    for j in range(len(ii)):
        if '(silence)' != train_files[ii[j]]:
            while train_files[ii[j]] in val_files:
                if len(extra_ii) == 0: extra_ii = _extra_indexes()
                ii[j], extra_ii = extra_ii[0], extra_ii[1:]
                replaced += 1

    X = train_X[ii]
    Y = train_Y[ii]
    files = train_files[ii]
    
    return X, Y, files

In [8]:
train_X = np.memmap('%s/train_X.mem' % OUT_DIR, np.float32,
                    'r').reshape((-1, ) + INPUT_SIZE)
train_Y = np.memmap('%s/train_Y.mem' % OUT_DIR, np.float32, 'r').reshape(
    (-1, len(LABELS)))

train_files = np.load('%s/train_files.npy' % OUT_DIR)

assert len(train_Y) == len(train_X)
assert len(train_files) == len(train_X)

print('len(train_X):', len(train_X))

len(train_X): 1000000


In [10]:
for fold in range(FOLDS):

    print('fold:', fold)

    # read val data
    val_X = np.load('%s/val/val_X_%d.npy' % (OUT_DIR, fold))
    val_Y = np.load('%s/val/val_Y_%d.npy' % (OUT_DIR, fold))
    val_files = np.load('%s/val/val_files_%d.npy' % (OUT_DIR, fold))
    assert len(val_X) == len(val_files)
    assert len(val_Y) == len(val_files)
    print('len(val_X):', len(val_X))
    val_files = set(val_files)

    # create dir to store models
    models_dir = MODELS_DIR.replace('$fold$', str(fold))
    os.makedirs(models_dir, exist_ok=True)
    print('models_dir:', models_dir)

    def train_generator(n_per_batch):
        while True:
            X, Y, files = choose_batch(n_per_batch, train_X, train_Y, train_files, val_files)
            yield (X, Y)
            
    

fold: 0
len(val_X): 2441
models_dir: out/models/run_A/fold_0
fold: 1
len(val_X): 2441
models_dir: out/models/run_A/fold_1
fold: 2
len(val_X): 2441
models_dir: out/models/run_A/fold_2
fold: 3
len(val_X): 2440
models_dir: out/models/run_A/fold_3
fold: 4
len(val_X): 2440
models_dir: out/models/run_A/fold_4
fold: 5
len(val_X): 2440
models_dir: out/models/run_A/fold_5
fold: 6
len(val_X): 2440
models_dir: out/models/run_A/fold_6
fold: 7
len(val_X): 2440
models_dir: out/models/run_A/fold_7
fold: 8
len(val_X): 2440
models_dir: out/models/run_A/fold_8
fold: 9
len(val_X): 2440
models_dir: out/models/run_A/fold_9
