In [5]:
%load_ext autoreload
%autoreload 2

import numpy as np
import os
import librosa
import glob

import sys
sys.path.insert(0,'..')
from mavd.data_generator import DataGenerator
from mavd.model import *
from mavd.callbacks import *

from keras.optimizers import Adam
from keras.callbacks import CSVLogger
from keras.models import Model
from keras.layers import Concatenate,Dropout

os.environ["CUDA_VISIBLE_DEVICES"]="0"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
# files parameters
Nfiles = None
resume = False
load_subset = Nfiles

# audio parameters
sr = 22050
sequence_time = 1.0
sequence_hop_time = 0.5
audio_hop = 512
audio_win = 1024
n_fft = 1024
normalize_data = 'none' # para comparar loss con MST
get_annotations = True
mel_bands = 128
htk = True
normalize_energy = True

# training
learning_rate = 0.001
epochs = 101
batch_size = 64
sed_early_stopping = 100
epoch_limit = None
fit_verbose = True
fine_tuning = False

#model
large_cnn = True
frames = True

class_list1 = ['car','bus','motorcycle']
class_list2 = ['engine','brakes','wheel','compressor']
class_list3 = ['car/engine','car/wheel',
              'bus/engine', 'bus/brakes','bus/compressor','bus/wheel',
              'motorcycle/engine', 'motorcycle/brakes'] 

label_list = [class_list1,class_list2,class_list3]

# Create output folders
expfolder = '../exps/S-CNN_fine_tuning/'

audio_folder = '/data_ssd/users/pzinemanas/MAVD-traffic/audio22050'
label_folder='/data_ssd/users/pzinemanas/MAVD-traffic/annotations'

In [8]:
params = {'sequence_time': sequence_time, 'sequence_hop_time':sequence_hop_time,
          'label_list':label_list, 'audio_hop':audio_hop, 'audio_win':audio_win,
          'n_fft':n_fft,'sr':sr,'mel_bands':mel_bands,'normalize':normalize_data, 'frames':frames,
          'get_annotations':get_annotations, 'dataset': 'MAVD'}

sequence_frames = int(np.ceil(sequence_time*sr/audio_hop))

# Datasets
partition = {}# IDs
labels = {}# Labels

train_files = sorted(glob.glob(os.path.join(audio_folder,'train', '*.flac')))
val_files = sorted(glob.glob(os.path.join(audio_folder,'validate', '*.flac')))

if load_subset is not None:
    train_files = train_files[:load_subset]
    val_files = val_files[:load_subset]

train_labels = {}
train_mel = {}
val_labels = {}
val_mel = {}
mel_basis = librosa.filters.mel(sr,n_fft,mel_bands,htk=True)
print('Founding scaler')
for n,id in enumerate(train_files):
    labels[id] = os.path.join(label_folder, 'train',os.path.basename(id).replace('.flac','.txt'))
for id in val_files:
    labels[id] = os.path.join(label_folder, 'validate',os.path.basename(id).replace('.flac','.txt'))

params['train'] = True
# Generators
print('Making generators')
training_generator = DataGenerator(train_files, labels, **params)
scaler = training_generator.get_scaler()

params['scaler'] = scaler
params['train'] = False
params['sequence_hop_time'] = sequence_time

validation_generator = DataGenerator(val_files, labels, **params)

print('Getting data')


_,train_example,train_mel_example,train_y_example = training_generator.return_random()
_,val_example,val_mel_example,val_y_example = validation_generator.return_random()

_,_,x_val,y_val = validation_generator.return_all()
_,_,x_train,y_train = training_generator.return_all()

if not frames:    
    x_train = np.transpose(x_train,(0,2,1))    
    x_val = np.transpose(x_val,(0,2,1))  
    val_example = np.transpose(val_example,(0,2,1))     

y_val_level1 = y_val[0]
y_val_level2 = y_val[1]
y_val_level3 = y_val[2]

y_train_level1 = y_train[0]
y_train_level2 = y_train[1]
y_train_level3 = y_train[2]


print('train',x_train.shape)    
print('val',x_val.shape)     

sequence_frames = x_val.shape[1]
scaler2 = training_generator.get_standard_scaler()

mean= scaler2.mean_
scale = scaler2.scale_

# Build model

print('\nBuilding model...')

sequence_samples = int(sequence_time*sr)

model_folder = "../exps/S-CNN_baseline/weights_best.hdf5"

model = cnn_fine_tuned(n_freq_cnn=mel_bands, n_frames_cnn=sequence_frames,large_cnn=large_cnn,
                      n_classes1=len(class_list1),n_classes2=len(class_list2),n_classes3=len(class_list3),
                      model_baseline=model_folder)

# Change batch normalization in first layer for the new dataset
model.layers[1].set_weights([np.ones_like(mean),np.zeros_like(mean),mean,scale])

# Only train new layers
for j in range(len(model.layers)-4):
    model.layers[j].trainable=False

model.summary()

opt = Adam(lr=learning_rate*0.2)

# Fit model
print('\nFitting model...')

if resume:
    f1s_best = resume_f1_best

metrics_callback = MetricsCallback_levels(x_val, [y_val_level1,y_val_level2,y_val_level3], 0, 0, 
                                          os.path.join(expfolder, 'weights_best.hdf5'))
csv_logger = CSVLogger(os.path.join(expfolder,'training.log'))

model.compile(loss='binary_crossentropy',optimizer=opt)#, loss_weights=loss_w)

history = model.fit(x=x_train, y=[y_train_level1,y_train_level2,y_train_level3], batch_size=2*batch_size,
                            epochs=epochs, verbose=fit_verbose,
                            validation_split=0.0,
                            shuffle=True,
                            callbacks=[metrics_callback,csv_logger])

Founding scaler
Making generators
Getting data
(26356, 128)
(615648, 128)
train (13992, 44, 128)
val (1995, 44, 128)

Building model...


W0830 13:36:16.218409 139915274192640 deprecation_wrapper.py:119] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0830 13:36:16.226257 139915274192640 deprecation.py:323] From /home/pzinemanas/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 44, 128)           0         
_________________________________________________________________
batch_normalization_13 (Batc (None, 44, 128)           512       
_________________________________________________________________
lambda_4 (Lambda)            (None, 44, 128, 1)        0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 40, 124, 128)      3328      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 20, 62, 128)       0         
_________________________________________________________________
batch_normalization_14 (Batc (None, 20, 62, 128)       512       
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 16, 58, 128)       409728    
__________

F1 = 0.6022, ER = 0.4998, F1 = 0.7269, ER = 0.6168, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6413 (17)

Epoch 20/101
F1 = 0.5940, ER = 0.5128, F1 = 0.7269, ER = 0.6168, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6413 (17)

Epoch 21/101
F1 = 0.5932, ER = 0.5154, F1 = 0.7269, ER = 0.6168, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6413 (17)

Epoch 22/101
F1 = 0.6035, ER = 0.4985, F1 = 0.7269, ER = 0.6168, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6413 (17)

Epoch 23/101
F1 = 0.6105, ER = 0.4876, F1 = 0.7269, ER = 0.6168, F1 = 0.5892, ER = 0.4868 -  Best val F1s: 0.6422 (IMPROVEMENT, saving)

Epoch 24/101
F1 = 0.5882, ER = 0.5241, F1 = 0.7269, ER = 0.6168, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6422 (22)

Epoch 25/101
F1 = 0.6014, ER = 0.5007, F1 = 0.7269, ER = 0.6168, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6422 (22)

Epoch 26/101
F1 = 0.5761, ER = 0.5445, F1 = 0.7269, ER = 0.6168, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6422 (22)

Epoch 27/101
F1 = 0.6108, ER = 0.487

F1 = 0.5673, ER = 0.5566, F1 = 0.6971, ER = 0.3589, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6423 (26)

Epoch 81/101
F1 = 0.5672, ER = 0.5475, F1 = 0.6837, ER = 0.3686, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6423 (26)

Epoch 82/101
F1 = 0.5567, ER = 0.5614, F1 = 0.6851, ER = 0.3643, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6423 (26)

Epoch 83/101
F1 = 0.5775, ER = 0.5475, F1 = 0.6947, ER = 0.3350, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6423 (26)

Epoch 84/101
F1 = 0.5586, ER = 0.5523, F1 = 0.6843, ER = 0.3624, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6423 (26)

Epoch 85/101
F1 = 0.5710, ER = 0.5557, F1 = 0.6811, ER = 0.3192, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6423 (26)

Epoch 86/101
F1 = 0.5560, ER = 0.5531, F1 = 0.6714, ER = 0.3296, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6423 (26)

Epoch 87/101
F1 = 0.5493, ER = 0.5440, F1 = 0.6732, ER = 0.3555, F1 = 0.5892, ER = 0.4868  - Best val F1s: 0.6423 (26)

Epoch 88/101
F1 = 0.5429, ER = 0.5393, F1 = 0.6757, E