# Train S-CNN baseline

In [6]:
%load_ext autoreload
%autoreload 2

import numpy as np
import os
import librosa
import glob
import sys

sys.path.insert(0,'../..')
from sed_endtoend.cnn.model import build_custom_cnn
from keras.optimizers import Adam
from sed_endtoend.callbacks import MetricsCallback
from sed_endtoend.data_generator import DataGenerator, Scaler
from keras.callbacks import CSVLogger

os.environ["CUDA_VISIBLE_DEVICES"]="1"

from params import *

# files parameters
Nfiles = None
resume = False
load_subset = Nfiles

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
params = {'sequence_time': sequence_time, 'sequence_hop_time':sequence_hop_time,
          'label_list':label_list,'audio_hop':audio_hop, 'audio_win':audio_win,
          'n_fft':n_fft,'sr':sr,'mel_bands':mel_bands,'normalize':normalize_data, 
          'frames':frames,'get_annotations':get_annotations, 'dataset': dataset}

sequence_frames = int(np.ceil(sequence_time*sr/audio_hop))

# Datasets
labels = {}# Labels

train_files = sorted(glob.glob(os.path.join(audio_folder,'train', '*.wav')))
val_files = sorted(glob.glob(os.path.join(audio_folder,'validate', '*.wav')))

if load_subset is not None:
    train_files = train_files[:load_subset]
    val_files = val_files[:load_subset]

train_labels = {}
train_mel = {}
val_labels = {}
val_mel = {}

for n,id in enumerate(train_files):
    labels[id] = os.path.join(label_folder, 'train',os.path.basename(id).replace('.wav','.txt'))
for id in val_files:
    labels[id] = os.path.join(label_folder, 'validate',os.path.basename(id).replace('.wav','.txt'))

params['train'] = True
# Generators
print('Making training generator')
training_generator = DataGenerator(train_files, labels, **params)

params['train'] = False
params['sequence_hop_time'] = sequence_time # To calculate F1_1s

print('Making validation generator')
validation_generator = DataGenerator(val_files, labels, **params)

print('Getting validation data')

_,_,mel_val,y_val = validation_generator.return_all()

print('Getting training data')

_,_,mel_train,y_train = training_generator.return_all()

print('Founding standard scaler')
standard_scaler = Scaler(normalizer='standard')
standard_scaler.fit(mel_train)
standard_scaler_sklearn = standard_scaler.get_scaler()
mean= standard_scaler_sklearn.mean_
scale = standard_scaler_sklearn.scale_

Making training generator
Making validation generator
Getting validation data
0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %
Getting training data
0.0 %
10.0 %
20.0 %
30.0 %
40.0 %
50.0 %
60.0 %
70.0 %
80.0 %
90.0 %
Founding standard scaler


In [7]:
print('\nBuilding model...')

sequence_samples = int(sequence_time*sr)

model = build_custom_cnn(n_freq_cnn=mel_bands, n_frames_cnn=sequence_frames,large_cnn=large_cnn)

# Init Batchnorm
model.layers[1].set_weights([np.ones_like(mean),np.zeros_like(mean),mean,scale])

model.summary()

opt = Adam(lr=learning_rate)

if resume:
    print('Loading best weights and resuming...')
    weights_best_file = os.path.join(expfolder, 'weights_best.hdf5')
    model.load_weights(weights_best_file)

# Fit model
print('\nFitting model...')

if resume:
    f1s_best = resume_f1_best

metrics_callback = MetricsCallback(mel_val, y_val, 0, 0, os.path.join(expfolder, 'weights_best.hdf5'))
csv_logger = CSVLogger(os.path.join(expfolder, 'training.log'))

model.compile(loss='binary_crossentropy',optimizer=opt)

history = model.fit(x=mel_train, y=y_train, batch_size=2*batch_size,
                            epochs=epochs, verbose=fit_verbose,
                            validation_split=0.0,
                            shuffle=True,
                            callbacks=[metrics_callback,csv_logger])


Building model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 44, 128)           0         
_________________________________________________________________
batch_normalization_9 (Batch (None, 44, 128)           512       
_________________________________________________________________
lambda_3 (Lambda)            (None, 44, 128, 1)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 40, 124, 128)      3328      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 20, 62, 128)       0         
_________________________________________________________________
batch_normalization_10 (Batc (None, 20, 62, 128)       512       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 16, 58, 128)       40

F1 = 0.5276, ER = 0.5639 - Best val F1s: 0.5362 (79)

Epoch 92/101
F1 = 0.5343, ER = 0.5583 - Best val F1s: 0.5362 (79)

Epoch 93/101
F1 = 0.5341, ER = 0.5571 - Best val F1s: 0.5362 (79)

Epoch 94/101
F1 = 0.5334, ER = 0.5613 - Best val F1s: 0.5362 (79)

Epoch 95/101
F1 = 0.5301, ER = 0.5652 - Best val F1s: 0.5362 (79)

Epoch 96/101
F1 = 0.5387, ER = 0.5509 -  Best val F1s: 0.5387 (IMPROVEMENT, saving)

Epoch 97/101
F1 = 0.5249, ER = 0.5778 - Best val F1s: 0.5387 (95)

Epoch 98/101
F1 = 0.5350, ER = 0.5548 - Best val F1s: 0.5387 (95)

Epoch 99/101
F1 = 0.5321, ER = 0.5645 - Best val F1s: 0.5387 (95)

Epoch 100/101
F1 = 0.5264, ER = 0.5710 - Best val F1s: 0.5387 (95)

Epoch 101/101
F1 = 0.5191, ER = 0.5847 - Best val F1s: 0.5387 (95)

