### Importing libraries  

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import os
import tqdm
import warnings
warnings.simplefilter("ignore")

### Interpreting data from dataframe

In [None]:
df = pd.read_csv('./UrbanSound8K/metadata/UrbanSound8K.csv')
df.head()

## Loading data and Extracting Features

In [None]:
def extract_features(filename, input_height = 50):
    max_sr = 88200
    
    # Loading audio file as 1D numpy array.
    # -len(y) depends on duration of audio file, thus needs to be normalized (4 seconds).
    y, sr = librosa.load(filename)
    
    # Getting sampling rate and resampling to create audio data of same size.
    sr_normalized = sr*max_sr//len(y)
    y_resampled = librosa.core.resample(y, sr, sr_normalized)
    
    # Obtaining mel-scaled spectrogram of the normalized audio
    S = librosa.feature.melspectrogram(y = y_resampled, sr = sr_normalized, n_mels = input_height, fmax = 8000)
    
    # Obtaining chroma_cqt
    chroma_cqt = librosa.feature.chroma_cqt(y_resampled, sr=sr_normalized, n_chroma = input_height)
    
    # Obtaining mel frequency cepstral coefficients
    mfcc = librosa.feature.mfcc(S = librosa.power_to_db(S), n_mfcc=input_height)
    
    # X = np.dstack((S, chroma_cqt, mfcc))
    X = np.dstack((S, chroma_cqt, mfcc))
    # Returning X 
    return X

In [None]:
def createXY(data_dirs):
    # Initialize X and Y as empty lists
    X = []
    Y = []
    
    # Do for all forlders in the list data_dirs
    for fold in tqdm.tnrange(len(data_dirs)):
        data_dir = data_dirs[fold]
        
        # List of all audios in the given directory
        audio_files = os.listdir(data_dir)
        if '.DS_Store' in audio_files:
            audio_files.remove('.DS_Store')
        n_files = len(audio_files)

        for i in tqdm.tnrange (n_files):
            # Full path to audio file in the folder
            full_audio_path = data_dir + '/' + audio_files[i]

            # Extract features for the audio and append it to X 
            x = extract_features(full_audio_path)
            X.append(x)

            # Get the class label for the audio from main dataframe and append it
            df_row = df.loc[df['slice_file_name'] == audio_files[i]]
            y = df_row.iloc[0].classID
            Y.append(y)

    # Reframe X, Y as numpy arrays and reshape Y
    X,Y = np.array(X), np.array(Y)
    Y = np.reshape(Y, (len(Y),1))
    return X,Y

In [None]:
# main_path is the path to the folder containing 10 folders with audio files 
main_path = './UrbanSound8K/audio'
folders = os.listdir(main_path)

# Initializing X, Y and appeding to them, that data from remaining folders in training data
X, Y = create_XY(main_path + '/' + folders[0])
for i in range(1,10):
    folder_path = main_path + '/' + folders[i]
    X_i, Y_i = createXY(folder_path)
    X = np.append(X, X_i, axis = 0)
    Y = np.append(Y, Y_i, axis = 0)

In [None]:
# Importing one hot encoder to dummy encode Y
from sklearn.preprocessing import OneHotEncoder

# One Hot Encoding Y and changing it to an array
enc = OneHotEncoder()
Y = enc.fit_transform(Y)
Y = Y.toarray()

In [None]:
X = np.reshape(X, [X.shape[0], X.shape[1], X.shape[2], 1])

In [None]:
X.shape, Y.shape

## CNN Model using Keras

In [None]:
# Specifying the CNN structure

input_width = 173
input_height = 50
input_channels = 2
n_pixels = 44288

n_conv1 = 128
k_conv1 = 2
k_maxpool1 = 2
rate_dropout1 = 0.15

n_conv2 = 64
k_conv2 = 2
k_maxpool2 = 2
rate_dropout2 = 0.20

n_hidden1 = 1024
n_out = 10

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout

In [None]:
# Creating our Convolutional neural network model, and its layers

model = Sequential()
    
model.add(Conv2D(n_conv1, kernel_size=k_conv1, activation='relu', input_shape=(input_height,input_width,input_channels)))
model.add(MaxPooling2D(pool_size = (k_maxpool1, k_maxpool1)))
model.add(Dropout(rate = rate_dropout1))

model.add(Conv2D(n_conv2, kernel_size=k_conv2, activation='relu'))
model.add(MaxPooling2D(pool_size = (k_maxpool2, k_maxpool2)))
model.add(Dropout(rate = rate_dropout2))
  
model.add(Flatten())
    
model.add(Dense(n_hidden1, activation = 'sigmoid'))

model.add(Dense(n_out, activation='softmax'))

In [None]:
# Compiling the model
from keras.optimizers import SGD, Adam
opt = Adam()
model.compile(loss = "categorical_crossentropy", metrics = ['accuracy'], optimizer = opt)

In [None]:
# Fitting on the training data
hist = model.fit(X_train, Y_train, batch_size=64, epochs=50)

hist_df = pd.DataFrame.from_dict(hist.history)
hist_df.to_csv('TrainingHistory.csv')

In [None]:
# Extracting test data
X_test, Y_test = X, Y = create_XY(main_path + '/' + folders[9])

In [None]:
# Predicting using our model and observing accuracy
Y_pred = model.predict(X_test)
Y_pred= np.argmax(Y_pred, axis=1)
Y_pred = np.reshape(Y_pred, [len(preds),1])
correct_preds= np.equal(Y_test, Y_pred)
print(correct_preds/len(Y_test)