# Train MST+CNN model

In [1]:
import numpy as np
import os
import librosa
import glob
import sys

sys.path.insert(0,'../..')
from sed_endtoend.cnn.model import build_custom_cnn
from sed_endtoend.mst.model import MST
from sed_endtoend.callbacks import MetricsCallback
from sed_endtoend.concatenate_models import concatenate
from sed_endtoend.data_generator import DataGenerator, Scaler
#from sed_endtoend.gen_mel_filters import mel_filters
from keras.callbacks import CSVLogger
from keras.optimizers import Adam

os.environ["CUDA_VISIBLE_DEVICES"]="0"

# files parameters
Nfiles = None
resume = False
load_subset = Nfiles

# load parameters
from params import *

Using TensorFlow backend.


In [2]:
params = {'sequence_time': sequence_time, 'sequence_hop_time':sequence_hop_time,
          'label_list':label_list,'audio_hop':audio_hop, 'audio_win':audio_win,
          'n_fft':n_fft,'sr':sr,'mel_bands':mel_bands,'normalize':normalize_data, 
          'frames':frames,'get_annotations':get_annotations, 'dataset': dataset}

sequence_frames = int(np.ceil(sequence_time*sr/audio_hop))

# Datasets
labels = {}# Labels

train_files = sorted(glob.glob(os.path.join(audio_folder,'train', '*.wav')))
val_files = sorted(glob.glob(os.path.join(audio_folder,'validate', '*.wav')))

if load_subset is not None:
    train_files = train_files[:load_subset]
    val_files = val_files[:load_subset]

train_labels = {}
train_mel = {}
val_labels = {}
val_mel = {}

for n,id in enumerate(train_files):
    labels[id] = os.path.join(label_folder, 'train',os.path.basename(id).replace('.wav','.txt'))
for id in val_files:
    labels[id] = os.path.join(label_folder, 'validate',os.path.basename(id).replace('.wav','.txt'))

# Generators
print('Making training generator')
training_generator = DataGenerator(train_files, labels, **params)

params['sequence_hop_time'] = sequence_time # To calculate F1_1s

print('Making validation generator')
validation_generator = DataGenerator(val_files, labels, **params)

print('Getting data')
x_val,_,mel_val,y_val = validation_generator.return_all()
x_train,_,mel_train,y_train = training_generator.return_all()

print('Founding scaler')
scaler = Scaler(normalizer=normalize_data)
scaler.fit(mel_train)
mel_train = scaler.transform(mel_train)
mel_val = scaler.transform(mel_val)

Founding scaler
Making training generator
Making validation generator
Getting data
0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %
0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %


In [3]:
sequence_frames = mel_val.shape[1]
sequence_samples = int(sequence_time*sr)

# Build model
print('\nBuilding model...')

model_mel = MST(mel_bands,sequence_samples,audio_win,audio_hop)  
model_cnn = build_custom_cnn(n_freq_cnn=mel_bands, n_frames_cnn=sequence_frames,large_cnn=large_cnn)

# Init with best weigths
model_mel.load_weights("../../sed_endtoend/mst/weights_best.hdf5")
model_cnn.load_weights("../../sed_endtoend/cnn/weights_best.hdf5")

model = concatenate(sequence_frames,audio_win,model_cnn,model_mel,sequence_samples=sequence_samples,frames=frames)

model.summary()

opt = Adam(lr=learning_rate)

if resume:
    print('Loading best weights and resuming...')
    weights_best_file = os.path.join(expfolder, 'weights_best.hdf5')
    model.load_weights(weights_best_file)

# Fit model
print('\nFitting model...')

if resume:
    f1s_best = resume_f1_best

metrics_callback = MetricsCallback(x_val, [y_val,mel_val], 0, 0, os.path.join(expfolder, 'weights_best.hdf5'))
csv_logger = CSVLogger('training.log')

#losses_factor = K.variable(1/16.)
#alpha = K.variable(0.0)
#beta = K.variable(1.0)
#bt_callback = BT_strategy(alpha,beta, a=30, b=5, W_end=0.5)

model.compile(loss=['binary_crossentropy','mean_squared_error'],loss_weights=[0.9, 0.1],optimizer=opt)

history = model.fit(x=x_train, y=[y_train,mel_train], batch_size=2*batch_size,
                            epochs=epochs, verbose=fit_verbose,
                            validation_split=0.0,
                            shuffle=True,
                            callbacks=[metrics_callback,csv_logger])



W1003 11:38:21.972408 139944068986624 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W1003 11:38:21.987283 139944068986624 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W1003 11:38:21.989398 139944068986624 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W1003 11:38:22.040526 139944068986624 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.com


Building model...


W1003 11:38:22.232811 139944068986624 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W1003 11:38:22.242889 139944068986624 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W1003 11:38:27.990638 139944068986624 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

W1003 11:38:28.255285 139944068986624 deprecation.py:506] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with k

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 22050, 1)          0         
_________________________________________________________________
model_1 (Model)              (None, 44, 128)           1020288   
_________________________________________________________________
model_2 (Model)              (None, 10)                2481162   
Total params: 3,501,450
Trainable params: 3,498,634
Non-trainable params: 2,816
_________________________________________________________________

Fitting model...
Epoch 1/101
F1 = 0.3790, ER = 0.7390 -  Best val F1s: 0.3790 (IMPROVEMENT, saving)

Epoch 2/101
F1 = 0.4056, ER = 0.7147 -  Best val F1s: 0.4056 (IMPROVEMENT, saving)

Epoch 3/101
F1 = 0.4105, ER = 0.7062 -  Best val F1s: 0.4105 (IMPROVEMENT, saving)

Epoch 4/101
F1 = 0.4105, ER = 0.7103 - Best val F1s: 0.4105 (2)

Epoch 5/101
F1 = 0.4134, ER = 0.7078 -  Best val F1s: 0.4

F1 = 0.4724, ER = 0.5980 - Best val F1s: 0.4743 (36)

Epoch 83/101
F1 = 0.4649, ER = 0.6004 - Best val F1s: 0.4743 (36)

Epoch 84/101
F1 = 0.4659, ER = 0.5903 - Best val F1s: 0.4743 (36)

Epoch 85/101
F1 = 0.4686, ER = 0.6015 - Best val F1s: 0.4743 (36)

Epoch 86/101
F1 = 0.4697, ER = 0.5809 - Best val F1s: 0.4743 (36)

Epoch 87/101
F1 = 0.4649, ER = 0.6001 - Best val F1s: 0.4743 (36)

Epoch 88/101
F1 = 0.4743, ER = 0.5759 - Best val F1s: 0.4743 (36)

Epoch 89/101
F1 = 0.4707, ER = 0.5854 - Best val F1s: 0.4743 (36)

Epoch 90/101
F1 = 0.4567, ER = 0.6191 - Best val F1s: 0.4743 (36)

Epoch 91/101
F1 = 0.4598, ER = 0.6125 - Best val F1s: 0.4743 (36)

Epoch 92/101
F1 = 0.4688, ER = 0.5981 - Best val F1s: 0.4743 (36)

Epoch 93/101
F1 = 0.4698, ER = 0.5852 - Best val F1s: 0.4743 (36)

Epoch 94/101
F1 = 0.4777, ER = 0.5741 -  Best val F1s: 0.4777 (IMPROVEMENT, saving)

Epoch 95/101
F1 = 0.4610, ER = 0.5967 - Best val F1s: 0.4777 (93)

Epoch 96/101
F1 = 0.4736, ER = 0.5846 - Best val F1s: 0.4