In [1]:
import os
import math
import shutil
import numpy as np
from multiprocessing import Pool
from keras.optimizers import RMSprop, SGD
from keras.losses import categorical_crossentropy
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

Using TensorFlow backend.


In [2]:
GPU = '0'
RND = 1
RUN = 'F3'
OUT_DIR = 'out_1_5m/'
TRAIN_TMP_DIR = OUT_DIR + '/train'
INPUT_DIR = '/d2/caches/tf-speech/train/audio'
TENSORBOARD_DIR = '/tensorboard/tf-speech/%s' % RUN
MODELS_DIR = '%s/models/%s' % (OUT_DIR, RUN)
INPUT_SIZE = (64, 64, 1)  # n_mels x width x 1ch

N_VAL_SAMPLES = 3000
N_TRAIN_SAMPLES = 1500000  # how many training samples to generate

In [3]:
# make only specific GPU to be utilized
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = GPU

In [4]:
%run 'lib.ipynb'
%run 'data-generator.ipynb'
%run 'models.ipynb'

In [5]:
# remove tensorboard data
if os.path.isdir(TENSORBOARD_DIR): shutil.rmtree(TENSORBOARD_DIR)

In [6]:
# load val data

val_X_path = OUT_DIR + '/val_X.npy'
val_Y_path = OUT_DIR + '/val_Y.npy'

val_X = np.load(val_X_path)
val_Y = np.load(val_Y_path)

assert len(val_X) == len(val_Y)
print('val samples: %d' % len(val_X))

val samples: 3000


In [7]:
# load training data

train_X_file = '%s/train_X.mem' % OUT_DIR
train_Y_file = '%s/train_Y.mem' % OUT_DIR

train_X = np.memmap(
    train_X_file, np.float32, 'r', shape=(N_TRAIN_SAMPLES, ) + INPUT_SIZE)
train_Y = np.memmap(
    train_Y_file, np.float32, 'r', shape=(N_TRAIN_SAMPLES, len(LABELS)))

assert len(train_X) == len(train_Y)
print('training samples: %d' % len(train_X))

training samples: 1500000


In [8]:
# create model
model = Model_3(input_size=INPUT_SIZE, output_size=len(LABELS))
model.build()
optimizer = SGD(lr=1e-1)
model.m.compile(
    optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy']\
)

In [9]:
# create models dir
if os.path.isdir(MODELS_DIR): shutil.rmtree(MODELS_DIR)
os.makedirs(MODELS_DIR)

In [10]:
# train model

N_PER_BATCH = 500
STEPS_PER_EPOCH = len(
    train_X) // N_PER_BATCH // 15  # last number splits train set into # epochs
N_EPOCHS = 100

print('# samples per epoch: %d\n' % (STEPS_PER_EPOCH * N_PER_BATCH))


def train_generator(n_per_batch, random=False):
    if random:
        while True:
            ixs = np.random.randint(0, len(train_X), size=N_PER_BATCH)
            batch_X = train_X[ixs]
            batch_Y = train_Y[ixs]
            yield (batch_X, batch_Y)
    else:
        start_i = 0
        while True:
            if start_i >= len(train_X): start_i = 0
            batch_X = train_X[start_i:start_i + n_per_batch]
            batch_Y = train_Y[start_i:start_i + n_per_batch]
            yield (batch_X, batch_Y)
            start_i += n_per_batch


model.m.fit_generator(
    train_generator(N_PER_BATCH, random=True),
    STEPS_PER_EPOCH,
    epochs=N_EPOCHS,
    validation_data=(val_X, val_Y),
    callbacks=[
        TensorBoard(log_dir=TENSORBOARD_DIR),
        ModelCheckpoint(
            MODELS_DIR +
            '/e{epoch:03d}-l={loss:.5f}-vl={val_loss:.5f}-a={acc:.5f}-va={val_acc:.5f}.h5',
            monitor='val_acc',
            verbose=0,
            save_best_only=False,
            save_weights_only=False,
            mode='auto'),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.15,
            patience=2,
            min_lr=1e-9,
            verbose=1),
        EarlyStopping(
            monitor='val_loss',
            min_delta=0.000001,
            patience=50,
            verbose=1,
            mode='auto')
    ])

# samples per epoch: 100000

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 00013: reducing learning rate to 0.015000000223517418.
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 00017: reducing learning rate to 0.002250000089406967.
Epoch 19/100
Epoch 20/100
Epoch 00019: reducing learning rate to 0.0003375000203959644.
Epoch 21/100
Epoch 22/100
Epoch 00021: reducing learning rate to 5.062500131316483e-05.
Epoch 23/100
Epoch 24/100
Epoch 00023: reducing learning rate to 7.593749978695996e-06.
Epoch 25/100
Epoch 26/100
Epoch 00025: reducing learning rate to 1.1390625104468198e-06.
Epoch 27/100
Epoch 28/100
Epoch 00027: reducing learning rate to 1.7085937997762812e-07.
Epoch 29/100
Epoch 30/100
Epoch 00029: reducing learning rate to 2.562890699664422e-08.
Epoch 31/100
Epoch 32/100
Epoch 00031: reducing learning rate to 3.8443360494966324e-09.
Epoch

KeyboardInterrupt: 

In [None]:
# !cp out_1m/models/E/e007-l\=0.29786-vl\=0.23771-a\=0.91079-va\=0.93333.h5 models/E_e007-l\=0.29786-vl\=0.23771-a\=0.91079-va\=0.93333.h5