# Train MST+CNN model

In [1]:
import numpy as np
import os
import librosa
import glob
import sys

sys.path.insert(0,'../..')
from sed_endtoend.cnn.model import build_custom_cnn
from sed_endtoend.mst.model import MST
from sed_endtoend.concatenate_models import concatenate
from sed_endtoend.data_generator import DataGenerator, Scaler

os.environ["CUDA_VISIBLE_DEVICES"]="0"

# files parameters
Nfiles = None
resume = False
load_subset = Nfiles

# load parameters
from params import *

Using TensorFlow backend.


In [2]:
params = {'sequence_time': sequence_time, 'sequence_hop_time':sequence_hop_time,
          'label_list':label_list,'audio_hop':audio_hop, 'audio_win':audio_win,
          'n_fft':n_fft,'sr':sr,'mel_bands':mel_bands,'normalize':normalize_data, 
          'frames':frames,'get_annotations':get_annotations, 'dataset': dataset}

sequence_frames = int(np.ceil(sequence_time*sr/audio_hop))

# Datasets
labels = {}# Labels

test_files = sorted(glob.glob(os.path.join(audio_folder,'test', '*.wav')))
val_files = sorted(glob.glob(os.path.join(audio_folder,'validate', '*.wav')))

if load_subset is not None:
    test_files = test_files[:load_subset]
    val_files = val_files[:load_subset]

test_labels = {}
test_mel = {}
val_labels = {}
val_mel = {}

for n,id in enumerate(test_files):
    labels[id] = os.path.join(label_folder, 'test',os.path.basename(id).replace('.wav','.txt'))
for id in val_files:
    labels[id] = os.path.join(label_folder, 'validate',os.path.basename(id).replace('.wav','.txt'))

params['sequence_hop_time'] = sequence_time # To calculate F1_1s    
    
# Generators
print('Making test generator')
test_generator = DataGenerator(test_files, labels, **params)

print('Making validation generator')
validation_generator = DataGenerator(val_files, labels, **params)

print('Getting validation data')

x_val,_,mel_val,y_val = validation_generator.return_all()

print('Getting test data')

x_test,_,mel_test,y_test = test_generator.return_all()

Making test generator
Making validation generator
Getting validation data
0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %
Getting test data
0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %


In [10]:
sequence_frames = mel_val.shape[1]
sequence_samples = int(sequence_time*sr)

# Build model
print('\nBuilding model...')

model_mel = MST(mel_bands,sequence_samples,audio_win,audio_hop)  
model_cnn = build_custom_cnn(n_freq_cnn=mel_bands, n_frames_cnn=sequence_frames,large_cnn=large_cnn)

model = concatenate(sequence_frames,audio_win,model_cnn,model_mel,sequence_samples=sequence_samples,frames=frames)

weights_best_file = os.path.join(expfolder, 'weights_best.hdf5')
model.load_weights(weights_best_file)

model.summary()

y_test_predicted = model.predict(x_test)[0]
y_val_predicted = model.predict(x_val)[0]

np.save(os.path.join(expfolder, 'y_val_predicted.npy'),y_val_predicted)
np.save(os.path.join(expfolder, 'y_val.npy'),y_val)

np.save(os.path.join(expfolder, 'y_test_predict.npy'),y_test_predicted)
np.save(os.path.join(expfolder, 'y_test.npy'),y_test)

print("[Done]")


Building model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 22050, 1)          0         
_________________________________________________________________
model_7 (Model)              (None, 44, 128)           1020288   
_________________________________________________________________
model_8 (Model)              (None, 10)                2481162   
Total params: 3,501,450
Trainable params: 3,498,634
Non-trainable params: 2,816
_________________________________________________________________
[Done]
