## Importing packages

In [None]:
%matplotlib
import glob
import random
import itertools
import math
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score
from python_speech_features import mfcc, fbank, logfbank
from pydub import AudioSegment
from collections import Counter
from multiprocessing import Pool
from tqdm import tqdm

## Reading files' paths

In [None]:
reciters_paths = glob.glob('./reciters/*/*')

train_paths_dictionary = {}
validation_paths_dictionary = {}
test_paths_dictionary = {}

for i, j in itertools.groupby(reciters_paths, key=lambda x: x.split('/')[-2]):
    files = list(j)
    train_paths, test_paths = train_test_split(files, test_size=0.30, random_state=13)
    test_paths, validation_paths = train_test_split(test_paths, test_size=0.30, random_state=13)
    train_paths_dictionary[int(i)] = train_paths
    test_paths_dictionary[int(i)] = test_paths
    validation_paths_dictionary[int(i)] = validation_paths

In [None]:
random.seed(a=13)

## Preprocessing

In [None]:
# %%time
# def get_data_and_labels(paths_dictionary):
#     number_of_labels = len(paths_dictionary)
#     number_of_segments = 254
#     number_of_features = 13
#     x = []
#     y = []
#     for reciter_id, reciter_path in tqdm(paths_dictionary.items()):
#         for clip_path in random.sample(reciter_path, min(10, len(reciter_path))):
#             clip = AudioSegment.from_mp3(clip_path).split_to_mono()[0]
#             clip_sample_rate = clip.frame_rate
#             nfft = 2 ** math.ceil(np.log2(0.025 * clip_sample_rate))
#             clip_sliced = list(clip[::5000])[:-1] # to avoid the last incomplete slice
#             samples = random.sample(clip_sliced, min(5, len(clip_sliced)))
#             for sample in samples:
#                 features = mfcc(np.array(sample.get_array_of_samples()), 
#                                 samplerate=clip_sample_rate, nfft=nfft, numcep=26)
#                 x.append(features[:number_of_segments, :])
#                 y.append(reciter_id)
#     x = np.array(x)
#     y = np.array(y)
    
#     return x, y

# x_train, y_train = get_data_and_labels(train_paths_dictionary)
# x_test, y_test = get_data_and_labels(test_paths_dictionary)
# x_val, y_val = get_data_and_labels(validation_paths_dictionary)

## Saving MFCC features

In [None]:
# np.save('./features/x_train_26.npy', x_train)
# np.save('./features/x_test_26.npy', x_test)
# np.save('./features/x_val_26.npy', x_val)
# np.save('./features/y_train_26.npy', y_train)
# np.save('./features/y_test_26.npy', y_test)
# np.save('./features/y_val_26.npy', y_val)

In [None]:
x_train = np.load('./features/x_train_26.npy')
x_test = np.load('./features/x_test_26.npy')
x_val = np.load('./features/x_val_26.npy')
y_train = np.load('./features/y_train_26.npy')
y_test = np.load('./features/y_test_26.npy')
y_val = np.load('./features/y_val_26.npy')

## Benchmark algorithm

In [None]:
# from sklearn.linear_model import LogisticRegression

# clf = LogisticRegression()
# clf.fit(x_train_, y_train)
# pred = clf.predict(x_test_)
# accuracy_score(y_test, pred)

## Classification algorithm

In [None]:
from keras.models import Model, Sequential, load_model
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, TensorBoard
from keras.layers import Input, Dense, LSTM, Dropout, Embedding, Bidirectional, SpatialDropout1D, Flatten, MaxPool1D
from keras.layers import Concatenate, Average, Add, GlobalAveragePooling1D, GlobalMaxPooling1D, CuDNNLSTM, CuDNNGRU, GRU
from keras.layers import BatchNormalization, GaussianNoise, GaussianDropout, AlphaDropout
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.utils.np_utils import to_categorical
from keras import backend as K

In [None]:
class accuracy_callback(Callback):
    def __init__(self):
        super(accuracy_callback, self).__init__()

    def on_train_begin(self, logs={}):
        if not ('val_accuracy' in self.params['metrics']):
            self.params['metrics'].append('val_accuracy')

    def on_epoch_end(self, epoch, logs={}):
        logs['val_accuracy'] = float('-inf')
        if(self.validation_data):
            y_pred = self.model.predict([self.validation_data[0]], batch_size = self.params['batch_size'])
            y_pred = np.array([np.argmax(p) for p in y_pred])
            
            y_test = self.validation_data[1]
            y_test = np.array([np.argmax(p) for p in y_test])
            
            logs['val_accuracy'] = accuracy_score(y_test, y_pred)

In [None]:
K.clear_session()

inp = Input(shape=(254, 26))
x = SpatialDropout1D(0.05)(inp)
x = Bidirectional(CuDNNGRU(512, return_sequences=True, name='bidirectional_gru'))(x)
x = GlobalMaxPooling1D()(x)
x = Dropout(0.2)(x)
x = Dense(256, activation="relu", name='dense_1')(x)
x = Dropout(0.2)(x)
x = Dense(50, activation="softmax", name='output')(x)

early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.0, patience=5, verbose=1, mode='max')
checkpoint = ModelCheckpoint('model.h5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
tb = TensorBoard(log_dir='./logs', write_graph=True, write_grads=True, write_images=True)


model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['acc'])

history = model.fit(x_train, to_categorical(y_train), epochs=50, batch_size=128, verbose=1, 
          validation_data=(x_val, to_categorical(y_val)), 
          callbacks=[accuracy_callback(), early_stopping, checkpoint, tb])

## Results

In [None]:
K.clear_session()
model = load_model('model.h5')
pred = model.predict(x_test)
pred = [np.argmax(entry) for entry in pred]

In [None]:
accuracy_score(y_test, pred)* 100