In [0]:
# Fuente: https://github.com/adanRivas/CNN-Audio-Classifier-with-Keras-Tensorflow/

# Descargamos y preparamos los datos

In [0]:
# descargamos el dataset que utilizaremos (https://github.com/karoldvl/ESC-50/)
!wget https://github.com/karoldvl/ESC-50/archive/master.zip
!unzip master.zip

In [0]:
!ls -la

In [0]:
# load python libraries
%matplotlib inline
import numpy as np
import pandas as pd
import random
from scipy.io import wavfile
from sklearn.preprocessing import scale
import librosa.display
import librosa
import matplotlib.pyplot as plt
import os

In [0]:
def save_melspectrogram(directory_path, file_name, dataset_split, label, sampling_rate=44100):
    """ Will save spectogram into current directory"""
    
    path_to_file = os.path.join(directory_path, file_name)
    data, sr = librosa.load(path_to_file, sr=sampling_rate, mono=True)
    data = scale(data)

    melspec = librosa.feature.melspectrogram(y=data, sr=sr, n_mels=128)
    # Convert to log scale (dB) using the peak power (max) as reference
        # per suggestion from Librbosa: https://librosa.github.io/librosa/generated/librosa.feature.melspectrogram.html
    log_melspec = librosa.power_to_db(melspec, ref=np.max)  
    librosa.display.specshow(log_melspec, sr=sr)
    
    # create saving directory
    directory = './ESC-50-master/melspectrograms/{dataset}/{label}'.format(dataset=dataset_split, label=label)
    os.makedirs(directory, exist_ok=True)
    
    plt.savefig(directory + '/' + file_name.strip('.wav') + '.png')

In [0]:
def _train_test_split(filenames, train_pct):
    """Create train and test splits for ESC-50 data"""
    random.seed(2018)
    n_files = len(filenames)
    n_train = int(n_files*train_pct)
    train = np.random.choice(n_files, n_train, replace=False)
        
    # split on training indices
    training_idx = np.isin(range(n_files), train)
    training_set = np.array(filenames)[training_idx]
    testing_set = np.array(filenames)[~training_idx]
    print('\tfiles in training set: {}, files in testing set: {}'.format(len(training_set), len(testing_set)))
    
    return {'training': training_set, 'testing': testing_set}

In [0]:
dataset_dir = './ESC-50-master'

# Load meta data for audio files
meta_data = pd.read_csv(dataset_dir + '/meta/esc50.csv')

labs = meta_data.category
unique_labels = labs.unique()
print(unique_labels)
meta_data.head()

In [0]:
# interesting sounds
interesting_sounds_idx = [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
interesting_sounds = ['crying_baby', 'sneezing', 'clapping', 'breathing', 'coughing', 'footsteps', 'laughing', 'brushing_teeth', 'snoring', 'drinking_sipping']

In [0]:
# usamos concurrent para hacerlo en paralelo
import concurrent
from tqdm import tqdm

def compute_and_save_mel_spectogram(data):
    directory_path, filename, dataset_split, label = data
    save_melspectrogram(directory_path, filename, dataset_split, label, sampling_rate=44100)
    return filename

results = []

for label in interesting_sounds:
    print('\nProccesing {} audio files'.format(label))
    current_label_meta_data = meta_data[meta_data.category == label]
    datasets = _train_test_split(current_label_meta_data.filename, train_pct=0.8)
    for dataset_split, audio_files in datasets.items():
        n_audio_files = len(audio_files)
        directory_path = dataset_dir + '/audio/'
        data = list(zip([directory_path] * n_audio_files,  audio_files, [dataset_split] * n_audio_files, [label] * n_audio_files))
        
        # Create a pool of processes. By default, one is created for each CPU in your machine.
        with concurrent.futures.ProcessPoolExecutor() as executor:
            # Process the list of files, but split the work across the process pool to use all CPUs!
            for out_fn in tqdm(executor.map(compute_and_save_mel_spectogram, data), total=len(data)):
                results.append(out_fn)
        

In [0]:
!ls -lah ./ESC-50-master/melspectrograms/*/*

# Creamos el modelo y lo entrenamos

In [0]:
%matplotlib inline
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import metrics
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from time import time
import numpy as np
import json

In [0]:
# Establecemos parámetros

batch_size = 40
epochs = 200

# dimensions of our images.
img_width, img_height = 224, 224

input_tensor = Input(shape=(224,224,3))

In [0]:
# configuramos los generadores

# training generator configuration
training_data_dir = './ESC-50-master/melspectrograms/training'

training_datagen = image.ImageDataGenerator(
    rescale=1./255)

training_generator = training_datagen.flow_from_directory(
    training_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size)

# validation generator configuration
validation_data_dir ='./ESC-50-master/melspectrograms/testing/'

validation_datagen = image.ImageDataGenerator(
    rescale=1./255)

validation_generator = validation_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size)

In [0]:
nb_training_samples = 372
nb_validation_samples = 130

In [0]:
# cargamos el modelo base
base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)
print('Model loaded.')
base_model.summary()

In [0]:
# build a classifier model to put on top of the convolutional model
top_model = Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(10, activation='softmax'))
top_model.summary()

In [0]:
# top_model.load_weights('bootlneck_fc_model.h5')
model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
model.summary()

In [0]:
num_layers_to_freeze = 15

def top_5_accuracy(y_true, y_pred):
    return metrics.top_k_categorical_accuracy(y_true, y_pred, k=5)

for layer in model.layers[:num_layers_to_freeze]:
    layer.trainable = False

# definimos el optimizador
# optimizer = SGD(lr=1e-4, momentum=0.9, decay=1e-6, nesterov=True)  # con nesterov y weight decay
optimizer = SGD(lr=1e-4, momentum=0.9) # sin nesterov
model.compile(optimizer=optimizer, 
                      loss='categorical_crossentropy', 
                      metrics=['acc', top_5_accuracy])

# serialize model to JSON
model_json = model.to_json()
model_filename = "vgg16_model_{}_frozen_layers.json".format(num_layers_to_freeze)
with open(model_filename, "w") as json_file:
    json_file.write(model_json)

In [0]:
tensorboard = TensorBoard(log_dir="logs/layers_frozen_{}".format(num_layers_to_freeze))

# checkpoint
filepath="esc50_vgg16_stft_weights_train_last_2_base_layers.best.hdf5"
best_model_checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [best_model_checkpoint, tensorboard]

model.fit_generator(
    training_generator,
    steps_per_epoch=nb_training_samples/batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples/batch_size,
    callbacks=callbacks_list)


# Evaluamos el modelo

In [0]:
ls -lah ./ESC-50-master/melspectrograms/testing/coughing/

In [0]:
# Get top k predictions for selected test files
import json

def get_top_k_predictions(preds, label_map, k=5, print_flag=False):
    sorted_array = np.argsort(preds)[::-1]
    top_k = sorted_array[:k]
    label_map_flip = dict((v,k) for k,v in label_map.items())
    
    y_pred = []
    for label_index in top_k:
        if print_flag:
            print("{} ({})".format(label_map_flip[label_index], preds[label_index]))
        y_pred.append(label_map_flip[label_index])
        
    return y_pred

label_map = (training_generator.class_indices)
 
json_content = json.dumps(label_map)
f = open("cough_label_map.json","w")
f.write(json_content)
f.close()

img_path = './ESC-50-master/melspectrograms/testing/coughing/1-19118-A-24.png'

img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)* 1./255

preds = model.predict(x)[0]

get_top_k_predictions(preds, label_map, k=3)

In [0]:
# Calculate and plot confusion matrix

import itertools
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

#     print(cm)
    plt.figure(figsize=(24,24))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

import os
from sklearn.metrics import confusion_matrix

testing_dir = './ESC-50-master/melspectrograms/testing/'

y_true = []
y_pred = []
for label in label_map.keys():
    file_list = os.listdir(testing_dir + label)
    for file_name in file_list:
        img_path = testing_dir + label + '/' + file_name
        
        img = image.load_img(img_path, target_size=(224, 224))
        
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)* 1./255
        
        preds = model.predict(x)[0]
        
        y_true.append(label)
        y_pred.append(get_top_k_predictions(preds, label_map, k=1)[0])
        
cm = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm, sorted(label_map.keys()), normalize=True)