### Preprocessing

In [1]:
import numpy
import os
import pandas as pd
import librosa
import librosa.display
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sns
from tensorflow.keras.models import load_model

In [2]:
# Initialize all path variables
dir_path = os.getcwd()
x_data = os.path.join(dir_path, "validation-dataset/X")
y_data = os.path.join(dir_path, "validation-dataset/Y")
mfcc_data = os.path.join(dir_path, "MFCC")

### Preprocessing Data

In [4]:
def generateMFCC(x_data, y_data, mfcc_data):
    for file in os.listdir(x_data):
        mel_spectrogram = numpy.load(x_data+'/'+file)
        print(file)
        mfcc = librosa.feature.mfcc(S=mel_spectrogram, sr=16000,n_mfcc=20)
        mean = numpy.average(mfcc)
        std = numpy.std(mfcc)
        if(std == 0):
            std = std + 1e-25
        mfcc = (mfcc - mean)/std
        path = os.path.join(mfcc_data, file)
        numpy.save(path, mfcc)

In [6]:
x = os.listdir(x_data)
x = sorted(set(x))

In [7]:
mel_spectrograms=[]
for file in x:
    arr = numpy.load(os.path.join(x_data, file))
    m,n,o = arr.shape
    arr = arr.reshape(n,o)
    mel_spectrogram = librosa.power_to_db(arr, ref=numpy.max)
    mat = arr.reshape((arr.shape[0], arr.shape[1], 1))
    mel_spectrograms.append(tf.convert_to_tensor(mat))
mel_spectrograms = numpy.array(mel_spectrograms)

In [8]:
def eventroll_to_multihot_vector(eventroll):
    """
    Parameters
    ----------
    eventroll : np.array
        Eventroll matrix of shape=(11, 1000).
    
    Returns
    -------
    np.array
        A multihot vector of shape=(10,)
    """
    
    # findout active events:
    active_events = (eventroll.sum(axis=1) >= 0.5).astype('float')
    
    # remove silence class:
    return numpy.delete(active_events, 8)

In [9]:
y_list = os.listdir(y_data)
y_list = sorted(set(y_list))

In [10]:
#List containing all spectrograms
y = [] 
for file in y_list:
    arr = numpy.load(os.path.join(y_data, file))
    y.append(eventroll_to_multihot_vector(arr))
y = numpy.array(y)

#### Load the required data

In [11]:
# X=mfccs[0:10000]
X_test=mel_spectrograms
y_test=y
# # used 'random_state' of 40 while splitting to get the balanced split of data 
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0, random_state=40, shuffle="true")

In [12]:
# X_train_n = os.path.join(dir_path, 'X_train_logspec')
# y_train_n = os.path.join(dir_path, 'y_train_logspec')
X_test_n = os.path.join(dir_path, 'X_test_logspec')
y_test_n = os.path.join(dir_path, 'y_test_logspec')
# numpy.save(X_train_n, X_train)
# numpy.save(y_train_n, y_train)
numpy.save(X_test_n, X_test)
numpy.save(y_test_n, y_test)