# This notebook deals with audio classification using librosa and tensorflow

## There are a lot of ways by which we can process an audio file to feed it in a neural network. Though there are multiple ways but all work on a single principal which is to manipulate the data into tha data which we require( tabular -> numerical form) and then feed it to a neural network. 

### In this notebook I have explained the method of working with a audio files to convert it to a tabular form for building a model.

### Importing the required modules

In [None]:
import IPython.display as ipd 
import librosa
import pandas as pd
import os
import numpy as np
from tqdm import tqdm


from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation , Dropout

### Reading the data

In [None]:
# reading the files
audio_dataset_path = '../input/urbansound8k/'

# loading the csv
meta_data = pd.read_csv('../input/urbansound8k/UrbanSound8K.csv')
meta_data.head()

### Extracting features using librosa

In [None]:
# do feature extraction using librosa
def features_extract(file):
    # load the audio file
    audio,sample_rate = librosa.load(file_name,res_type='kaiser_fast')
    
    # extract the features
    feature = librosa.feature.mfcc(y=audio,sr=sample_rate,n_mfcc=50)
    
    # feature scaling
    scaled_feature = np.mean(feature.T,axis=0)
    
    # return the scaled features
    return scaled_feature

# list containg all the features
extracted = []

# for each row in the csv
for index_num,row in tqdm(meta_data.iterrows()):
    
    # get the file 
    file_name = os.path.join(os.path.abspath(audio_dataset_path),'fold'+str(row["fold"])+'/',str(row['slice_file_name']))
    
    # get file label
    final_class_labels = row['class']
    
    # extract feature
    data= features_extract(file_name)
    
    # store it in a list
    extracted.append([data,final_class_labels])


### Create a new dataframe from the extracted features

In [None]:
# create na new dataframe
extracted_df = pd.DataFrame(extracted,columns=['feature','class'])

# display first fivve rows of the dataframe
extracted_df.head()

### Distribute the data to X and Y

In [None]:
# get the data as a list and send it to np.array() 
# function to convert it into an array 
x = np.array(extracted_df['feature'].tolist())
y = np.array(extracted_df['class'].tolist())

### Use encoding to encode the string labels to an integer

In [None]:
# label encoding to get encoding
le = LabelEncoder()

# transform each category with it's respected label
Y = to_categorical(le.fit_transform(y))

### Split the data into train and test sets

In [None]:
# split the data to train and test set
x_train, x_test, y_train, y_test = train_test_split(x, Y, test_size=0.2, random_state = 42)

# print the details
print("Number of training samples = ", x_train.shape[0])
print("Number of testing samples = ",x_test.shape[0])

### Built the model

In [None]:
# Construct model 
num_labels = Y.shape[1]
model = Sequential()

model.add(Dense(256, input_shape=(50,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(128))

model.add(Dense(num_labels))
model.add(Activation('softmax'))
model.summary()

### Compile the model 
#### optimizer-> adam
#### loss function -> Categorical Cross Entropy

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy'],
)

### Fitting the model with a batch size of 32 for 150 epochs

In [None]:
num_epochs = 150
num_batch_size = 32

model.fit(
          x_train, 
          y_train, 
          batch_size=num_batch_size, 
          epochs=num_epochs,
          validation_data=(x_test, y_test),
         )

## Testing the model

### Create a function to extract feature from test audio

In [None]:
# function to extract features from the audion file
def extract_feature(file_name):
    # load the audio file
    audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    
    # get the feature 
    feature = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=50)
    
    # scale the features
    feature_scaled = np.mean(feature.T,axis=0)
    
    # return the array of features
    return np.array([feature_scaled])

### Print the result of test audio by feeding it to model

In [None]:
# function to predict the feature
def print_prediction(file_name):
    
    # extract feature from the function defined above
    prediction_feature = extract_feature(file_name) 
    
    # get the id of label using argmax
    predicted_vector = np.argmax(model.predict(prediction_feature), axis=-1)
    
    # get the class label from class id
    predicted_class = le.inverse_transform(predicted_vector)
    
    # display the result
    print("The predicted class is:", predicted_class[0], '\n') 

### testing an audio

In [None]:
# File name
file_name = '../input/urbansound8k/fold8/103076-3-0-0.wav'

# get the output
print_prediction(file_name)

# play the file
ipd.Audio(file_name)