In [1]:
import os
import math
import shutil
import numpy as np
from multiprocessing import Pool
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau

Using TensorFlow backend.


In [2]:
RND = 0
RUN = 'C'
OUT_DIR = 'out_1m/'
TRAIN_TMP_DIR = OUT_DIR + '/train'
INPUT_DIR = '/d2/caches/tf-speech/train/audio'
TENSORBOARD_DIR = '/tensorboard/tf-speech/%s' % RUN
MODELS_DIR = '%s/models/%s' % (OUT_DIR, RUN)
INPUT_SIZE = (64, 64, 1)  # n_mels x width x 1ch
MSG_NORM_MEAN = 116.536
MSG_NORM_STD = 21.5913
LABELS = [
    'yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go',
    'unknown', 'silence'
]

N_VAL_SAMPLES = 3000
N_TRAIN_SAMPLES = 1000000  # how many training samples to generate

In [3]:
%run 'lib.ipynb'
%run 'data-generator.ipynb'
%run 'models.ipynb'

In [4]:
# remove tensorboard data
if os.path.isdir(TENSORBOARD_DIR): shutil.rmtree(TENSORBOARD_DIR)

In [5]:
# init data gen
dg = DataGenerator(input_dir=INPUT_DIR, labels=LABELS)
dg.n_mels = INPUT_SIZE[0]
dg.msg_w = INPUT_SIZE[1]
# normalization params
dg.samplewise_norm = True
dg.msg_std = MSG_NORM_STD
dg.msg_mean = MSG_NORM_MEAN

In [6]:
# generate/load val data

val_files_path = OUT_DIR + '/val_files.npy'
val_X_path = OUT_DIR + '/val_X.npy'
val_Y_path = OUT_DIR + '/val_Y.npy'

if not os.path.isfile(val_files_path):
    # generate, save
    dg.val_files = {}
    val_X, val_Y = dg.generate_val_set(n=N_VAL_SAMPLES)
    np.save(val_files_path, dg.val_files)
    np.save(val_X_path, val_X)
    np.save(val_Y_path, val_Y)
else:
    # load
    dg.val_files = np.load(val_files_path)
    val_X = np.load(val_X_path)
    val_Y = np.load(val_Y_path)

assert len(val_X) == len(val_Y)
print('val samples: %d' % len(val_X))

val samples: 3000


In [7]:
# generate/load training data

train_X_file = '%s/train_X.mem' % OUT_DIR
train_Y_file = '%s/train_Y.mem' % OUT_DIR

if not os.path.isfile(train_X_file):
    dg.generate_train_set(
        n_total=N_TRAIN_SAMPLES,
        n_per_job=1000,
        n_pools=16,
        X_file=train_X_file,
        Y_file=train_Y_file,
        tmp_dir=TRAIN_TMP_DIR)

train_X = np.memmap(
    train_X_file, np.float32, 'r', shape=(N_TRAIN_SAMPLES, ) + INPUT_SIZE)
train_Y = np.memmap(
    train_Y_file, np.float32, 'r', shape=(N_TRAIN_SAMPLES, len(dg.labels)))

assert len(train_X) == len(train_Y)
print('training samples: %d' % len(train_X))

training samples: 1000000


In [8]:
# create model
model = Model_2(input_size=INPUT_SIZE, output_size=len(LABELS))
model.build()
optimizer = RMSprop(lr=1e-3)
model.m.compile(
    optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy']\
)

In [9]:
# create models dir
if os.path.isdir(MODELS_DIR): shutil.rmtree(MODELS_DIR)
os.makedirs(MODELS_DIR)

In [10]:
# # LR schedule

# def step_decay(epoch, initial_lr=0.0001, drop=.5, epochs_drop=5.):
#     lrate = initial_lr * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
#     return lrate

# plt.plot([step_decay(x) for x in range(0, 25)])

In [11]:
# train model

N_PER_BATCH = 500
STEPS_PER_EPOCH = len(
    train_X) // N_PER_BATCH // 10  # last number splits train set into # epochs
N_EPOCHS = 100

print('# samples per epoch: %d\n' % (STEPS_PER_EPOCH * N_PER_BATCH))


def train_generator(n_per_batch):
    start_i = 0
    while True:
        if start_i >= len(train_X): start_i = 0
        batch_X = train_X[start_i:start_i + n_per_batch]
        batch_Y = train_Y[start_i:start_i + n_per_batch]
        yield (batch_X, batch_Y)
        start_i += n_per_batch


model.m.fit_generator(
    train_generator(N_PER_BATCH),
    STEPS_PER_EPOCH,
    epochs=N_EPOCHS,
    validation_data=(val_X, val_Y),
    callbacks=[
        TensorBoard(log_dir=TENSORBOARD_DIR),
        ModelCheckpoint(
            MODELS_DIR +
            '/e{epoch:03d}-l={loss:.5f}-vl={val_loss:.5f}-a={acc:.5f}-va={val_acc:.5f}.h5',
            monitor='val_acc',
            verbose=0,
            save_best_only=False,
            save_weights_only=False,
            mode='auto'),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.1,
            patience=1,
            min_lr=1e-7,
            verbose=1),
        EarlyStopping(
            monitor='val_acc',
            min_delta=0.00001,
            patience=5,
            verbose=1,
            mode='auto')
    ])

# samples per epoch: 100000

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 00004: reducing learning rate to 0.00010000000474974513.
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 00009: reducing learning rate to 1.0000000474974514e-05.
Epoch 11/100
Epoch 00010: reducing learning rate to 1.0000000656873453e-06.
Epoch 12/100
Epoch 00011: reducing learning rate to 1.0000001111620805e-07.
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 00024: early stopping


<keras.callbacks.History at 0x7fd4d0d979b0>