# Deep Learning in Audio Classification in Python

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile

In [None]:
import python_speech_features
from python_speech_features import mfcc

In [None]:
from tqdm import tqdm

In [None]:
from keras.layers import Conv2D, MaxPool2D, Flatten, Dropout, Dense
from keras.layers import LSTM, TimeDistributed

from keras.models import Sequential

from keras.utils import to_categorical

from sklearn.utils.class_weight import compute_class_weight

In [None]:
import pickle

from keras.callbacks import ModelCheckpoint

In [None]:
class Config:
    def __init__(self, mode= 'conv', nfilt=26, nfeat=13, nfft = 2048, rate = 16000):
        self.mode = mode
        self.nfilt = nfilt
        self.nfeat = nfeat
        self.nfft = nfft
        self.rate = rate
        self.step = int(rate/10)
        self.model_path = os.path.join('models', mode + '.model')
        self.p_path = os.path.join('pickles', mode + '.p')

In [None]:
def check_data():
    if os.path.isfile(config.p_path):
        print('Loading existing data for {} model'.format(config.mode))
        with open(config.p_path, 'rb') as handle:
            tmp = pickle.load(handle)
            return tmp
    else:
        return None

In [None]:
def build_rand_feat():
    tmp = check_data()
    if tmp:
        return tmp.data[0], tmp.data[1]
        
    X = []
    y = []
    
    _min, _max = float('inf'), -float('inf')
    
    for _ in tqdm(range(n_samples)):
        
        rand_class = np.random.choice(class_dist.index, p = prob_dist)
        
        file = np.random.choice(df[df.Class==rand_class].index)
        
        rate, wav = wavfile.read(dataset_directory+str(rand_class)+"/"+str(file))
        Class = df.at[file, 'Class']
        
        rand_index = np.random.randint(0, wav.shape[0]-config.step)
        
        sample = wav[rand_index : rand_index + config.step]
        X_sample = mfcc(sample, rate, numcep=config.nfeat, nfilt=config.nfilt, nfft=config.nfft)
        
        _min = min(np.amin(X_sample), _min)
        _max = max(np.amax(X_sample), _max)
        
        X.append(X_sample)
        y.append(classes.index(Class))
        
    
    config.min = _min
    config.max = _max
    
    X, y = np.array(X), np.array(y)
    X = (X- _min) / (_max - _min)
    
    if config.mode == 'conv':
        X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
    elif config.mode =='time':
        X = X.reshape(X.shape[0], X.shape[1], X.shape[2])
    
    y = to_categorical(y, num_classes=2)
    
    config.data = (X, y)
    
    with open(config.p_path, 'wb') as handle:
        pickle.dump(config, handle, protocol=2)
    
    return X,y

In [None]:
def get_reccurent_model():
    ### Shape of data for RNN is (n, time, freq)
    model = Sequential()
    
    model.add(LSTM(128, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(128, return_sequences=True))
    
    model.add(TimeDistributed(Dense(64, activation='relu')))
    model.add(TimeDistributed(Dense(32, activation='relu')))
    model.add(TimeDistributed(Dense(16, activation='relu')))
    
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(2, activation='sigmoid'))
    model.summary()
    model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['acc'])
    
    return model

## Data Extraction

In [None]:
os.listdir('Temp_Dataset/')

In [None]:
classes = list(os.listdir('Dataset/train/'))

print("Number of Classes in the Data Set:", len(classes), "Classes")
print("The classes of the dataset are   :", classes[0], ",", classes[1])

In [None]:
column_names = ['Fname','Class', 'Length']
df = pd.DataFrame(columns = column_names)
df.info()

In [None]:
# dataset_directory = 'Dataset/Train/'
dataset_directory = 'Temp_Dataset/train/'

In [None]:
for c in list(classes):
    print('Number of files in the directory \'{}\' are {}'.format(c,len(os.listdir(dataset_directory+c))))

In [None]:
for c in list(classes):
    for n,f in tqdm(enumerate(os.listdir(dataset_directory+c))):
        rate, signal = wavfile.read(dataset_directory+str(c)+"/"+str(f))
        length = signal.shape[0]/rate
        f_df = pd.DataFrame({
            "Fname": str(f),
            "Class": str(c),
            "Length": length}, index = [n])
        df = df.append(f_df)

In [None]:
df.info()

In [None]:
class_dist = df.groupby(['Class'])['Length'].mean()
class_dist

In [None]:
df.set_index('Fname', inplace=True)
df.info()

# RNN Model using LSTM

In [None]:
n_samples = 2 * int(df['Length'].sum()/0.1)
prob_dist = class_dist / class_dist.sum()
choices = np.random.choice(class_dist.index, p= prob_dist)

In [None]:
config = Config(mode = 'time')
config

In [None]:
X,y = build_rand_feat()

In [None]:
y_flat = np.argmax(y, axis =1)

In [None]:
input_shape = (X.shape[1], X.shape[2])

In [None]:
model = get_reccurent_model()

## Adding Checkpoints

In [None]:
checkpoint = ModelCheckpoint(config.model_path, monitor='val_acc', verbose=1, mode='max',
                            save_best_only=True, save_weights_only=False, period=1)

In [None]:
model.fit(X, y, epochs=250, batch_size=32, shuffle = True, validation_split=0.1, callbacks=[checkpoint])

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=1, sharex=False, sharey=True, figsize=(20,8))

# Plot accuracy per iteration
plt.plot(model.history.history['acc'][:50], label='acc')
plt.plot(model.history.history['val_acc'][:50], label='val_acc')
plt.legend()

plt.title('Custom Built LSTM RNN Model\'s Training Analysis on the sickness and non-sickness Audio Data', size=16)
plt.xlabel("Epochs")
plt.ylabel("accuracy reached")

plt.show()