In [1]:
import numpy as np
import shutil
import os
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy

Using TensorFlow backend.


In [2]:
RND = 0
RUN = 'A'
OUT_DIR = 'out'
INPUT_DIR = '/d2/caches/tf-speech/train/audio'
TENSORBOARD_DIR = '/tensorboard/tf-speech/%s' % RUN
INPUT_SIZE = (64, 64, 1)  # n_mels x width x 1ch
MSG_NORM_MEAN = 116.536
MSG_NORM_STD = 21.5913
LABELS = [
    'yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go',
    'unknown', 'silence'
]

TRAIN_SAMPLES = 100000 # how many training samples to generate


In [3]:
%run 'lib.ipynb'
%run 'data-generator.ipynb'
%run 'models.ipynb'

In [5]:
# init data gen
dg = DataGenerator(input_dir=INPUT_DIR)
dg.n_mels = INPUT_SIZE[0]
dg.msg_w = INPUT_SIZE[1]
# normalization params
dg.samplewise_norm = True
dg.msg_std = MSG_NORM_STD
dg.msg_mean = MSG_NORM_MEAN

In [None]:
# generate/load val set
val_files_path = OUT_DIR + '/val_files.npy'
val_X_path = OUT_DIR + '/val_X.npy'
val_Y_path = OUT_DIR + '/val_Y.npy'

if os.path.isfile(val_files_path):
    # load val set
    dg.val_files = np.load(val_files_path)
    val_X = np.load(val_X_path)
    val_Y = np.load(val_Y_path)
else:
    # generate val set
    dg.val_files = None
    val_X, val_Y = dg.generate_val_set(n=2000)
    np.save(val_files_path, dg.val_files)
    np.save(val_X_path, val_X)
    np.save(val_Y_path, val_Y)

In [4]:
# create model
model = Model_1(classes=LABELS)
model.build()
optimizer = RMSprop(lr=0.001, decay=0.0)
model.m.compile(
    optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy']\
)

In [None]:
# remove tensorboard data
if os.path.isdir(TENSORBOARD_DIR): shutil.rmtree(TENSORBOARD_DIR)

In [45]:
train_X = np.zeros((TRAIN_SAMPLES,) + INPUT_SIZE)

def gen_samples(n, global_start_index=0):
    for i in range(n):
        a, l = dg.generate_audio()
        m = dg.msg(a)
        m = dg.normalize_msg(m)


from multiprocessing import Pool


def gen_samples_mp(n=100, pools=4):
    with Pool(pools) as p:
        p.map(gen_samples, [n] * pools)

In [65]:
%time gen_samples(100)

CPU times: user 12.4 s, sys: 34.7 s, total: 47.1 s
Wall time: 6.44 s


In [63]:
%time gen_samples_mp(n=100, pools=16)

CPU times: user 24 ms, sys: 128 ms, total: 152 ms
Wall time: 50.7 s


In [59]:
(25 * 16 / 12.6)/ (100 / 6.68) 

2.1206349206349207

In [60]:
(50 * 8 / 13.4)/ (100 / 6.48) 

1.9343283582089554

In [62]:
(25 * 32 / 24.3)/ (100 / 6.48) 

2.1333333333333333

In [64]:
(100 * 16 / 50.7)/ (100 / 6.48) 

2.0449704142011833

In [66]:
(25 * 32 / 24.3)

32.92181069958848

In [73]:
1000000/32/3600

8.680555555555555

In [71]:
1e6*64*64*4/1024/1024/1024, 'G'

(15.2587890625, 'G')