In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display as ipd
import librosa
import os
import librosa.display

import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

In [None]:
df = pd.read_csv("../input/urbansound8k/UrbanSound8K.csv")
df.head()

# Check whether the dataset is imbalanced

In [None]:
df['class'].value_counts()

In [None]:
filename1 = "../input/urbansound8k/fold1/101415-3-0-2.wav"
plt.figure(figsize=(14,5))
data,sample_rate=librosa.load(filename1)
librosa.display.waveplot(data,sr=sample_rate)
ipd.Audio(filename1)

In [None]:
data

In [None]:
filename2 = "../input/urbansound8k/fold5/100032-3-0-0.wav"
plt.figure(figsize=(14,5))
data1,sample_rate1=librosa.load(filename2)
librosa.display.waveplot(data1,sr=sample_rate1)
ipd.Audio(filename2)

# Observation

Here Librosa converts the signal to mono, meaning the channel will alays be 1

# Extract Features

Here we will be using Mel-Frequency Cepstral Coefficients(MFCC) from the audio samples. The MFCC summarises the frequency distribution across the window size, so it is possible to analyse both the frequency and time characteristics of the sound. These audio representations will allow us to identify features for classification.

In [None]:
def features_extractor(file):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    
    return mfccs_scaled_features

# Now we iterate through every audio file and extract features using Mel-Frequency Cepstral Coefficients


In [None]:
extracted_features=[]
for i in range(8732):
    file_name = '../input/urbansound8k/fold' + str(df["fold"][i]) + '/' + df["slice_file_name"][i]
    final_class_labels=df["class"][i]
    data=features_extractor(file_name)
    extracted_features.append([data,final_class_labels])

In [None]:
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
extracted_features_df.head()

# Split the dataset into independent and dependent dataset

In [None]:
X=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['class'].tolist())

In [None]:
X.shape

# Label Encoding

In [None]:
y=np.array(pd.get_dummies(y))

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.15,random_state=0,stratify=y)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
num_labels=y.shape[1]
num_labels

In [None]:
model=Sequential()
###first layer
model.add(Dense(100,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###second layer
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###third layer
model.add(Dense(100))
model.add(Activation('tanh'))
model.add(Dropout(0.5))

###final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [None]:
model.summary()


In [None]:
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')

In [None]:
num_epochs = 100
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

history = model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
test_accuracy=model.evaluate(X_test,y_test,verbose=0)
test_accuracy

In [None]:
predictions = model.predict(X_test)
preds = np.argmax(predictions, axis = 1)
result = pd.DataFrame(preds)
result.to_csv("UrbanSound8kResults.csv")