In [1]:
import numpy as np
import shutil
import os
from multiprocessing import Pool
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy

Using TensorFlow backend.


In [2]:
RND = 0
RUN = 'A'
OUT_DIR = 'out'
TRAIN_TMP_DIR = OUT_DIR + '/train'
INPUT_DIR = '/d2/caches/tf-speech/train/audio'
TENSORBOARD_DIR = '/tensorboard/tf-speech/%s' % RUN
INPUT_SIZE = (64, 64, 1)  # n_mels x width x 1ch
MSG_NORM_MEAN = 116.536
MSG_NORM_STD = 21.5913
LABELS = [
    'yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go',
    'unknown', 'silence'
]

N_VAL_SAMPLES = 2500
N_TRAIN_SAMPLES = 500000  # how many training samples to generate

In [3]:
%run 'lib.ipynb'
%run 'data-generator.ipynb'
%run 'models.ipynb'

In [4]:
# remove tensorboard data
if os.path.isdir(TENSORBOARD_DIR): shutil.rmtree(TENSORBOARD_DIR)

In [5]:
# init data gen
dg = DataGenerator(input_dir=INPUT_DIR)
dg.n_mels = INPUT_SIZE[0]
dg.msg_w = INPUT_SIZE[1]
# normalization params
dg.samplewise_norm = True
dg.msg_std = MSG_NORM_STD
dg.msg_mean = MSG_NORM_MEAN

In [6]:
# generate/load val set
val_files_path = OUT_DIR + '/val_files.npy'
val_X_path = OUT_DIR + '/val_X.npy'
val_Y_path = OUT_DIR + '/val_Y.npy'

if os.path.isfile(val_files_path):
    # load val set
    dg.val_files = np.load(val_files_path)
    val_X = np.load(val_X_path)
    val_Y = np.load(val_Y_path)
else:
    # generate val set
    dg.val_files = {}
    val_X, val_Y = dg.generate_val_set(n=N_VAL_SAMPLES)
    np.save(val_files_path, dg.val_files)
    np.save(val_X_path, val_X)
    np.save(val_Y_path, val_Y)
    
assert len(val_X) == len(val_Y)
print('val samples: %d' % len(val_X))

val samples: 2500


In [7]:
# create model
model = Model_1(classes=LABELS)
model.build()
optimizer = RMSprop(lr=0.001, decay=0.0)
model.m.compile(
    optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy']\
)

In [8]:
# generate training data

train_X_file = '%s/train_X.npy' % OUT_DIR
train_Y_file = '%s/train_Y.npy' % OUT_DIR

if os.path.isfile(train_X_file):

    train_X = np.load(train_X_file)
    train_Y = np.load(train_Y_file)

else:

    train_X = None
    train_Y = None

    def gen_training_samples(n, start_i):
        X = np.zeros((n, ) + INPUT_SIZE, dtype=np.float32)
        Y = np.zeros((n, len(LABELS)), dtype=np.float32)
        for i in range(n):
            wave, label = dg.generate_audio()
            msg = dg.normalize_msg(dg.msg(wave))
            msg = np.expand_dims(msg, 2)
            X[i] = msg
            Y[i] = dg.label_to_onehot(label)
        np.save('%s/X_%07d-%07d' % (TRAIN_TMP_DIR, start_i, n + start_i), X)
        np.save('%s/Y_%07d-%07d' % (TRAIN_TMP_DIR, start_i, n + start_i), Y)

    def generate_train_set(n_total=100, n_per_job=10, n_pools=16):
        assert n_total % n_per_job == 0

        global train_X
        global train_Y

        # create temporary dir for generated data
        if not os.path.isdir(TRAIN_TMP_DIR): os.makedirs(TRAIN_TMP_DIR)

        # launch generation in pool of workers

        n_jobs = n_total // n_per_job
        params = map(lambda x: [n_per_job, x * n_per_job], range(n_jobs))

        with Pool(n_pools) as p:
            p.starmap(gen_training_samples, list(params))

        # glue generated files together

        train_X = np.zeros((n_total, ) + INPUT_SIZE, dtype=np.float32)
        train_Y = np.zeros((n_total, len(dg.labels)), dtype=np.float32)

        for i in range(0, n_total, n_per_job):
            X_file = '%s/X_%07d-%07d.npy' % (TRAIN_TMP_DIR, i, i + n_per_job)
            Y_file = '%s/Y_%07d-%07d.npy' % (TRAIN_TMP_DIR, i, i + n_per_job)
            X = np.load(X_file)
            Y = np.load(Y_file)
            train_X[i:i + n_per_job] = X
            train_Y[i:i + n_per_job] = Y

        np.save(train_X_file, train_X)
        np.save(train_Y_file, train_Y)

    %time generate_train_set(n_total=N_TRAIN_SAMPLES, n_per_job=1000, n_pools=16)

assert len(train_X) == len(train_Y)
print('training samples: %d' % len(train_X))

training samples: 500000


In [None]:
N_BATCH = 100
N_EPOCHS = 10
N_STEPS_PER_EPOCH = 1000

model.m.fit(
    x=train_X[:50000],
    y=train_Y[:50000],
    batch_size=N_BATCH,
    epochs=N_EPOCHS,
    verbose=1,
    callbacks=None,
    validation_split=0.0,
    validation_data=(val_X, val_Y),
    shuffle=False,
    class_weight=None,
    sample_weight=None,
    initial_epoch=0)

In [15]:
def train_gen(n_per_batch):

    while True:

        X = np.zeros((n_per_batch, ) + INPUT_SIZE, dtype=np.float32)
        Y = np.zeros((n_per_batch, len(dg.labels)))

        for i in range(n_per_batch):
            wave, label = dg.generate_audio(transform=False)
            y = dg.label_to_onehot(label)
            x = dg.msg(wave)
#             x = dg.normalize_msg(dg.msg(wave))
            x = np.expand_dims(x, 2)
            X[i] = x
            Y[i] = y

        yield (X, Y)

In [None]:
model.m.fit_generator(
    generator=train_gen(50),
    steps_per_epoch=50000,
    epochs=10,
    verbose=1,
    validation_data=(val_X, val_Y),
    use_multiprocessing=True,
    max_queue_size=8)

Epoch 1/10
   42/50000 [..............................] - ETA: 96541s - loss: 14.8440 - acc: 0.0790