# Train SMel+S-CNN model

In [1]:
%load_ext autoreload
%autoreload 2


import numpy as np
import os
import librosa
import glob
import sys
sys.path.insert(0,'../..')

from sed_endtoend.cnn.model import build_custom_cnn
from sed_endtoend.smel.model import SMel
from sed_endtoend.callbacks import MetricsCallback
from sed_endtoend.concatenate_models import concatenate
from sed_endtoend.data_generator import DataGenerator, Scaler
from sed_endtoend.optimizer import Adam_Multipliers

from keras.callbacks import CSVLogger

os.environ["CUDA_VISIBLE_DEVICES"]="1"

from params import *

# files parameters
Nfiles = None
resume = False
load_subset = Nfiles

Using TensorFlow backend.


In [2]:
params = {'sequence_time': sequence_time, 'sequence_hop_time':sequence_hop_time,
          'label_list':label_list,'audio_hop':audio_hop, 'audio_win':audio_win,
          'n_fft':n_fft,'sr':sr,'mel_bands':mel_bands,'normalize':normalize_data, 
          'frames':frames,'get_annotations':get_annotations, 'dataset': dataset}

sequence_frames = int(np.ceil(sequence_time*sr/audio_hop))

# Datasets
labels = {}# Labels

train_files = sorted(glob.glob(os.path.join(audio_folder,'train', '*.wav')))
val_files = sorted(glob.glob(os.path.join(audio_folder,'validate', '*.wav')))

if load_subset is not None:
    train_files = train_files[:load_subset]
    val_files = val_files[:load_subset]

train_labels = {}
train_mel = {}
val_labels = {}
val_mel = {}


for n,id in enumerate(train_files):
    labels[id] = os.path.join(label_folder, 'train',os.path.basename(id).replace('.wav','.txt'))
for id in val_files:
    labels[id] = os.path.join(label_folder, 'validate',os.path.basename(id).replace('.wav','.txt'))

# Generators
print('Making training generator')
training_generator = DataGenerator(train_files, labels, **params)

params['sequence_hop_time'] = sequence_time # To calculate F1_1s

print('Making validation generator')
validation_generator = DataGenerator(val_files, labels, **params)

print('Getting data')

x_val,_,mel_val,y_val = validation_generator.return_all()
x_train,_,mel_train,y_train = training_generator.return_all()

print('Founding scaler')
scaler = Scaler(normalizer=normalize_data)

scaler.fit(mel_train)

mel_train = scaler.transform(mel_train)
mel_val = scaler.transform(mel_val)

Making training generator
Making validation generator
Getting data
0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %
0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %
Founding scaler


In [3]:
# Build model

print('\nBuilding model...')

sequence_samples = int(sequence_time*sr)

model_mel = SMel(mel_bands,sequence_samples,audio_win,audio_hop)  
model_cnn = build_custom_cnn(n_freq_cnn=mel_bands, n_frames_cnn=sequence_frames,large_cnn=large_cnn)

# Init with best weigths
model_mel.load_weights("../../sed_endtoend/smel/weights_best.hdf5")
model_cnn.load_weights("../../sed_endtoend/cnn/weights_best.hdf5")
model = concatenate(sequence_frames,audio_win,model_cnn,model_mel)

model_mel.summary()
model_cnn.summary()
model.summary()

gamma=0.75

opt = Adam_Multipliers(lr=learning_rate,multipliers=4*[lr_multiplier]+18*[1])

if resume:
    print('Loading best weights and resuming...')
    weights_best_file = os.path.join(expfolder, 'weights_best.hdf5')
    model.load_weights(weights_best_file)

# Fit model
print('\nFitting model...')

if resume:
    f1s_best = resume_f1_best

metrics_callback = MetricsCallback(x_val, [y_val,mel_val], 0, 0, os.path.join(expfolder, 'weights_best.hdf5'))
csv_logger = CSVLogger('training.log')
#losses_factor = K.variable(10.0)
#w1 = K.variable(0.0)
#w0 = K.variable(1.0)
#bt_callback = BT_strategy(w0,w1, alpha=8, beta=1, gamma=gamma)

model.compile(loss=['binary_crossentropy','mean_squared_error'],loss_weights=[0.9, 0.1],optimizer=opt)

history = model.fit(x=x_train, y=[y_train,mel_train], batch_size=2*batch_size,
                            epochs=epochs, verbose=fit_verbose,
                            validation_split=0.0,
                            shuffle=True,
                            callbacks=[metrics_callback,csv_logger])

W1003 14:06:48.479002 139753170519808 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.




Building model...


W1003 14:06:49.293417 139753170519808 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W1003 14:06:49.516024 139753170519808 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W1003 14:06:49.809784 139753170519808 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:1521: The name tf.log is deprecated. Please use tf.math.log instead.

W1003 14:06:50.038204 139753170519808 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 22050, 1024, 1)    0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 22050, 64, 128)    131200    
_________________________________________________________________
lambda_1 (Lambda)            (None, 22050, 64, 128)    0         
_________________________________________________________________
lambda_2 (Lambda)            (None, 22050, 128)        0         
_________________________________________________________________
lambda_3 (Lambda)            (None, 22050, 128)        0         
Total params: 131,200
Trainable params: 131,200
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inpu

F1 = 0.5242, ER = 0.5662 - Best val F1s: 0.5389 (56)

Epoch 64/101
F1 = 0.5353, ER = 0.5398 - Best val F1s: 0.5389 (56)

Epoch 65/101
F1 = 0.5389, ER = 0.5401 - Best val F1s: 0.5389 (56)

Epoch 66/101
F1 = 0.5286, ER = 0.5567 - Best val F1s: 0.5389 (56)

Epoch 67/101
F1 = 0.5241, ER = 0.5650 - Best val F1s: 0.5389 (56)

Epoch 68/101
F1 = 0.5278, ER = 0.5538 - Best val F1s: 0.5389 (56)

Epoch 69/101
F1 = 0.5257, ER = 0.5580 - Best val F1s: 0.5389 (56)

Epoch 70/101
F1 = 0.5285, ER = 0.5577 - Best val F1s: 0.5389 (56)

Epoch 71/101
F1 = 0.5271, ER = 0.5535 - Best val F1s: 0.5389 (56)

Epoch 72/101
F1 = 0.5179, ER = 0.5668 - Best val F1s: 0.5389 (56)

Epoch 73/101
F1 = 0.5245, ER = 0.5582 - Best val F1s: 0.5389 (56)

Epoch 74/101
F1 = 0.5242, ER = 0.5627 - Best val F1s: 0.5389 (56)

Epoch 75/101
F1 = 0.5343, ER = 0.5436 - Best val F1s: 0.5389 (56)

Epoch 76/101
F1 = 0.5208, ER = 0.5721 - Best val F1s: 0.5389 (56)

Epoch 77/101
F1 = 0.5365, ER = 0.5431 - Best val F1s: 0.5389 (56)

Epoch 78