# <center>Environmental Sound Classification with CNN</center>
---

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


## Expand the cells width 

In [0]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

## Important libraries.

In [0]:
import re
import cv2
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.io import wavfile
from IPython.display import Audio
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Dropout, Flatten, Activation

from __future__ import print_function
import tensorflow as tf


## Read the training dataset(Audio files)

In [0]:
audio_files = []
PATH = '/content/gdrive/My Drive/audio_6000'

for file_name in tqdm(os.listdir(PATH)):
    try:
        audio, sampling_rate = librosa.load(os.path.join(PATH,file_name),res_type='kaiser_fast')
        
        # Since class name/number is hidden in file name of audio file, so we have to extract the class name/number by regular expression.
        exp = re.findall('\d{1,2}.wav',file_name)
        audio_files.append([audio,int(float(exp[0][0:2]))])
    except Exception as e:
        pass

/content/gdrive/My Drive


## Read and Explore the classes.

In [0]:
#Read the Classes and Categories which i alerady saved in 'classes.csv' file

df = pd.read_csv('/content/gdrive/My Drive/classes.csv',delimiter = ',') ## header of classes.

categories = df.columns.tolist()

classes =      list(df[categories[0]].values)
classes.extend(list(df[categories[1]].values))
classes.extend(list(df[categories[2]].values))
classes.extend(list(df[categories[3]].values))
classes.extend(list(df[categories[4]].values))
df

## Explore a random training example
<b>sampling_rate - </b> which keep the flow of number of element per second of audio files

In [0]:
number_of_training_example = len(audio_files)

# One random index
random_index = np.random.randint(0,number_of_training_example)

plt.figure(figsize=(20,5))
plt.subplot(121)

audio, _class = audio_files[random_index][0],audio_files[random_index][1]

# Convert audio into spectrogram
spectrogram = librosa.feature.melspectrogram(audio)
plt.title("Spectrogram")
librosa.display.specshow(spectrogram, y_axis='mel', x_axis='time')

# Convert into wave form 
sampling_rate = int(len(audio)/5)
plt.subplot(122)
plt.title("Wave")
librosa.display.waveplot(audio, sr=sampling_rate)
plt.ylabel('Amplitude')
plt.show()

print('CLASS:',categories[int(_class/3)])
print('SUBCLASS:',classes[_class])

#Audio
Audio(audio,rate = sampling_rate)

In [0]:
audio_files= np.array(audio_files)

X = list(audio_files[:,0])

Y = audio_files[:,1]

del audio_files

## Augmentation<br>

<li><b> Data augmentation</b> is a technique to artificially create new training data from existing training data.</li>
<li> Since available dataset is not sufficient to train the model so i added the <b>white noise</b> to existing dataset.</li>
<li>Now we have 6000 training example.</li>

In [0]:
# It will take few seconds for augmentation.
size_of_audio_files = len(X[0])

number_of_audio_files = len(Y)

augmented_audio_files = []

for i in range(number_of_audio_files): 
    
    # Adding white noise
    X.append(X[i] + 0.005*np.random.randn(size_of_audio_files))
    
Y = np.r_[Y,Y]

## Split  dataset into training and testing

In [0]:
train_x,test_x,train_y,test_y = train_test_split(X, Y, test_size = 0.1, random_state=5, shuffle = True)


## Convert the input data into spectrogram to train the model

In [0]:
# It will take few seconds to covert audio into Spectrogram_files
x = train_x
train_x = []
length = len(train_y)

for i in range(length):
    train_x.append(librosa.feature.melspectrogram(x[i], sampling_rate))
del x    

In [0]:
# get the input diamention
SPEC_H, SPEC_W = train_x[0].shape
train_x = np.reshape(train_x,(length,SPEC_H, SPEC_W,1))
print(train_x.shape)

## Create Compile and Train the model

In [0]:
# create the model

model = Sequential()

# add layers
model.add(Conv2D(64, kernel_size=3, activation="relu", input_shape=(SPEC_H, SPEC_W, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, kernel_size=3, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(256, kernel_size=3, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(256, kernel_size=3, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation = 'relu'))
model.add(Dense(15, activation="softmax"))


# compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Training and Evaluation of the model
hist = model.fit(train_x, train_y, batch_size = 30 ,epochs=30,validation_split=0.1)

In [0]:
fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')

acc_ax.plot(hist.history['acc'], 'b', label='train acc')
acc_ax.plot(hist.history['val_acc'], 'g', label='val acc')

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuracy')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

t_x = test_x

test_x = []
length = len(test_y)

for i in range(length):
    test_x.append(librosa.feature.melspectrogram(t_x[i], sampling_rate))
del t_x    
test_x = np.reshape(test_x,(length,SPEC_H, SPEC_W,1))
test_result = model.evaluate(test_x, test_y)
train_result = model.evaluate(train_x, train_y)
plt.show()
print("Change the model change dense layer(128) plus dropout layer(0.5)")
print("epoch: 10, sampling rate =44100, sample label 15, total sample = 12000")
print("Test Accuracy",round(test_result[1],4))
print("Train Accuracy",round(train_result[1],4))

model.save("/content/gdrive/My Drive/train_label15_6000_final.h5")

In [0]:
from keras.models import load_model
model2 = load_model('/content/gdrive/My Drive/train_label15_6000_final.h5')
t_x1 = test_x

test_x = []
length = len(test_y)

for i in range(length):
    
    test_x.append(librosa.feature.melspectrogram(t_x1[i], sampling_rate))
#del t_x1    
test_x = np.reshape(test_x,(length,SPEC_H, SPEC_W,1))
test_result = model2.evaluate(test_x, test_y)
train_result = model2.evaluate(train_x, train_y)


print("Test Accuracy",round(test_result[1],4))
print("Train Accuracy",round(train_result[1],4))

In [0]:
model.summary()