In [None]:
#Se importan las librerias

import pandas as pd
import numpy as np

pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout
from tensorflow.keras.utils import to_categorical 

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

#librosa es un paquete de python para analisis de audio
#Documentacion Librosa: https://librosa.org/doc/latest/index.html

import os
import librosa
import librosa.display
import glob 
import skimage

In [None]:
# Se carga el dataset UrbanSound8K que posee 8732 sonidos etiquetados
df = pd.read_csv("../input/urbansound8k/UrbanSound8K.csv")
df.head()

In [None]:
#Se carga y se muestra un ejemplo de sonido humano que contiene el Dataset (children_playing)
dat1, sampling_rate1 = librosa.load('../input/urbansound8k/fold5/100263-2-0-121.wav')
plt.figure(figsize=(20, 10))
D = librosa.amplitude_to_db(np.abs(librosa.stft(dat1)), ref=np.max)
plt.subplot(4, 2, 1)
librosa.display.specshow(D, y_axis='linear')
plt.colorbar(format='%+2.0f dB')
plt.title('children_playing')

In [None]:
# Se recorre el dataset y se obtienen sus caracteristicas mediante librosa

feature = []
label = []

def parser(row):
    for i in range(8732):
        file_name = '../input/urbansound8k/fold' + str(df["fold"][i]) + '/' + df["slice_file_name"][i]
        X, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mels = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)        
        feature.append(mels)
        label.append(df["classID"][i])
    return [feature, label]

In [None]:
#Se declaran variables necesarias para el entrenamiento

temp = parser(df)
temp = np.array(temp)
data = temp.transpose()
X_ = data[:, 0]
Y = data[:, 1]
X = np.empty([8732, 128])

for i in range(8732):
    X[i] = (X_[i])
    
Y = to_categorical(Y)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state = 1)
X_train = X_train.reshape(6549, 16, 8, 1)
X_test = X_test.reshape(2183, 16, 8, 1)
input_dim = (16, 8, 1)

In [None]:
# Se crea el modelo Keras para el entrenamiento

model = Sequential()
model.add(Conv2D(64, (3, 3), padding = "same", activation = "tanh", input_shape = input_dim))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), padding = "same", activation = "tanh"))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(Dense(1024, activation = "tanh"))
model.add(Dense(10, activation = "softmax"))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
#Se definen las epocas y el batch size del entrenamiento

model.fit(X_train, Y_train, epochs = 90, batch_size = 50, validation_data = (X_test, Y_test))

In [None]:
#Resumen de lo obtenido con el entrenamiento del modelo

model.summary()

In [None]:
#Resultados obtenidos de las predicciones realizadas al Dataset de sonidos

predictions = model.predict(X_test)
score = model.evaluate(X_test, Y_test)
print(score)

In [None]:
# Se almacena el resultado obtenido de las predicciones en un archivo .csv (output)

preds = np.argmax(predictions, axis = 1)
result = pd.DataFrame(preds)

#Si la prediccion es 2 proviene de children_playing
result.to_csv("PrediccionesResultado.csv")