In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import os 
import matplotlib.pyplot as plt 
import tensorflow as tf
from tqdm import tqdm # to see process
# Audio Signal Processing Libarary
import IPython.display as ipd
import librosa
import librosa.display

from sklearn.model_selection import train_test_split

from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Activation, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization
from sklearn import metrics
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
# Getting Information of data (Data Structure)
# using os to check the audio file in each folder
path = ["/kaggle/input/urbansound8k/fold1","/kaggle/input/urbansound8k/fold2","/kaggle/input/urbansound8k/fold3","/kaggle/input/urbansound8k/fold4","/kaggle/input/urbansound8k/fold5",
        "/kaggle/input/urbansound8k/fold6","/kaggle/input/urbansound8k/fold7","/kaggle/input/urbansound8k/fold8","/kaggle/input/urbansound8k/fold9","/kaggle/input/urbansound8k/fold10"]
for i in range(10):
  for dirpath, dirname,filename in os.walk(path[i]):
    print(f"this is {i+1}st folder having {len(filename)} sound file in '{dirpath}'.")

## Analysis One Particular Audio File

In [None]:
filename = "/kaggle/input/urbansound8k/fold1/102842-3-0-1.wav"

plt.figure(figsize = (14,5))

## Librosa normalize the sound give it in in one single sample_rate by deafult this is 22050 or 22KHz
#---> and this normalize signal data in 0 to 1 and this change signal into one mono channel.
#---> Librosa converts the signal to mono, meaning the channel will alays be 1

sound_data, sample_rate = librosa.load(filename) # Load file to find data and sr(how many times per sec sound sample)
print("sample_rate : ",sample_rate)
print("data : ",sound_data)
# data come in 1-dimensional beacuse librosa change 2 channel into 1 mono channel
librosa.display.waveplot(sound_data, sr = sample_rate) # Plotting audio file
plt.title("SINGLE Channel audio signal using LIBROSA")
ipd.Audio(filename) # play the audio

#This is Dog noisy Sound

## The DataSet

In [None]:
data = pd.read_csv("/kaggle/input/urbansound8k/UrbanSound8K.csv")
data.head()

In [None]:
data.shape

In [None]:
#Check whether the dataset is imbalanced by seeing target value_count(give unique value)
data['class'].value_counts()
#By sseing we saw that mostly class have same data so there is little chnace of imbalanced so just ignore this

## Data PreProcessing

**Feature Extraction**

taking audio signal and create independent vector which will represent audio data into vector form.

**Feature Extraction Method : MFCC**

We use Mel-Frequency Cepstral Coefficients(MFCC) from the audio samples. The MFCC summarises the frequency distribution across the window size, so it is possible to analyse both the frequency and time characteristics of the sound. These audio representations will allow us to identify features for classification.

In [None]:
def features_extract(file_name):
    audio, sample_rate = librosa.load(file_name, res_type = 'kaiser_fast') 
    mfccs_features = librosa.feature.mfcc(y = audio, sr = sample_rate, n_mfcc = 40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis = 0)
    
    return mfccs_scaled_features

In [None]:
# Now we iterate through every audio file and extract features 
# using Mel-Frequency Cepstral Coefficients (MFCC)
audio_dataset_path = '/kaggle/input/urbansound8k/'
extracted_features = []
for index_num,row in tqdm(data.iterrows()):
    file_name = os.path.join(os.path.abspath(audio_dataset_path),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    final_class_labels = row["class"]
    data = features_extract(file_name)
    extracted_features.append([data,final_class_labels])

In [None]:
# converting extracted_features to Pandas dataframe
features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
features_df.head()

## Split the DataSet

In [None]:
# Split the dataset into independent and dependent dataset
X = np.array(features_df['feature'].tolist())
y = np.array(features_df['class'].tolist())

In [None]:
X.shape, y.shape

In [None]:
#class label
y

In [None]:
# Label Encoding 
# using label encoder to get back the class name using inverse label encoder 
labelencoder = LabelEncoder()
y = to_categorical(labelencoder.fit_transform(y)) # tranform class label 
y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
print(f'X_train shape is {X_train.shape}')
print(f'X_test shape is {X_test.shape}')
print(f'y_train shape is {y_train.shape}')
print(f'y_test shape is {y_test.shape}')

## Model Architecture

In [None]:
labels = y.shape[1] # total target variable or class variable
input_size = X.shape[1] # total feature value like here n_mfcc value 
print(f"number of total class label '{labels}'")
print(f"number of features used '{input_size}' ")

In [None]:
# network in sequential nature i.e. output of previous layer is input of the next layer
model = Sequential()

#first layer
model.add(Dense(units = 1024, input_shape = (input_size,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))

#second layer
model.add(Dense(units = 512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

#third layer
model.add(Dense(units = 256))
model.add(Activation('relu'))
model.add(Dropout(0.2))

#final layer
# add neural network so flatten the output comming from last layer of cnn model 
model.add(Flatten()) 
model.add(Dense(units = labels, activation="softmax"))

In [None]:
model.summary()

In [None]:
model.compile(loss = 'categorical_crossentropy', 
              metrics = ['accuracy'],
              optimizer = 'adam')

## Train the Model


In [None]:
num_epochs = 100
num_batch_size = 32
model.fit(X_train, y_train,
          batch_size = num_batch_size, 
          epochs = num_epochs,
          validation_data = (X_test, y_test),
          verbose = 1)

## Result Visualization

In [None]:
#saving the model history
loss = pd.DataFrame(model.history.history)

#plotting the loss and accuracy 
plt.figure(figsize=(10,10))

plt.subplot(2,2,1)
plt.plot(loss["loss"], label ="Loss")
plt.plot(loss["val_loss"], label = "Validation_loss")
plt.legend()
plt.title("Training and Validation Loss")

plt.subplot(2,2,2)
plt.plot(loss['accuracy'],label = "Training Accuracy")
plt.plot(loss['val_accuracy'], label ="Validation_ Accuracy ")
plt.legend()
plt.title("Training-Validation Accuracy")

## Prediction

In [None]:
prediction = model.predict(X_test)

# finding class with larget predicted probability using argmax of numpy 
y_pred = np.argmax(prediction, axis = 1)  # prediction using model 
y_test_orig = np.argmax(y_test, axis = 1) # original y_test
print(y_pred)

In [None]:
#Getting Class Label Name
class_label_lst = np.array(features_df['class'].unique().tolist())
print(class_label_lst)

In [None]:
class_name = ['dog_bark','children_playing','car_horn','air_conditioner','street_music',
              'gun_shot','siren','engine_idling','jackhammer','drilling']
print(classification_report(y_test_orig, y_pred, target_names = class_name))

In [None]:
confusion_df = pd.DataFrame(confusion_matrix(y_test_orig, y_pred), columns = class_name, index = class_name)
print("\n")
print("**************************** CONFUSION METRIX *********************************")
print("\n")
confusion_df