In [39]:
import numpy as np
from keras.optimizers import RMSprop
from keras.losses import categorical_crossentropy

In [47]:
RND = 0
RUN = 'A'
OUT_DIR = 'out/%s' % RUN
INPUT_DIR = '/d2/caches/tf-speech/train/audio'
MSG_SIZE = (64, 64)  # n_mels x width
MSG_NORM_MEAN = 116.536
MSG_NORM_STD = 21.5913
LABELS = [
    'yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go',
    'unknown', 'silence'
]

In [48]:
%run 'lib.ipynb'
%run 'data-generator.ipynb'
%run 'models.ipynb'

In [49]:
# create model
model = Model_1(classes=LABELS)
model.build()
optimizer = RMSprop(lr=0.001, decay=0.0)
model.m.compile(
    optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy']\
)

In [50]:
# init data gen
dg = DataGenerator(input_dir=INPUT_DIR)
dg.n_mels = MSG_SIZE[0]
dg.msg_w = MSG_SIZE[1]
# normalization params
dg.samplewise_norm = True
dg.msg_std = MSG_NORM_STD
dg.msg_mean = MSG_NORM_MEAN

In [51]:
# generate/load val set
val_files_path = OUT_DIR + '/val_files.npy'
val_X_path = OUT_DIR + '/val_X.npy'
val_Y_path = OUT_DIR + '/val_Y.npy'

if os.path.isfile(val_files_path):
    dg.val_files = np.load(val_files_path)
    val_X = np.load(val_X_path)
    val_Y = np.load(val_Y_path)
else:
    val_X, val_Y = dg.generate_val_set(n=10)
    np.save(val_files_path, dg.val_files)
    np.save(val_X_path, val_X)
    np.save(val_Y_path, val_Y)