In [5]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
import datetime
import matplotlib.pyplot as plt
import os


In [6]:
BASE_PATH = '../Dataset/'
dateFormat = '%d%m%Y%H%M%S%f'
# datetime.datetime.now().strftime('%d%m%Y%H%M%S')

In [7]:
# datetime.datetime.now().strftime('%d%m%Y%H%M%S%f')

In [8]:
csvPath = BASE_PATH+"audio_meta.csv"
metadata = pd.read_csv(csvPath, header=None)
metadata.head()

Unnamed: 0,0,1
0,Kupwara_14062021081522191884.wav,Kupwara
1,Kupwara_14062021081522204129.wav,Kupwara
2,Kupwara_14062021081522211612.wav,Kupwara
3,Kupwara_14062021081522222216.wav,Kupwara
4,Kupwara_14062021081522230464.wav,Kupwara


# Extract Mfcc and Scale them

In [9]:
# Extract MFCC features
def extractMfcc(audioData, sampleRate):
    # mfcc12 = librosa.feature.mfcc(y=audioData, sr=sampleRate, n_mfcc=12)
    mfcc24 = librosa.feature.mfcc(y=audioData, sr=sampleRate, n_mfcc=24)
    mfcc48 = librosa.feature.mfcc(y=audioData, sr=sampleRate, n_mfcc=48)
    mfcc96 = librosa.feature.mfcc(y=audioData, sr=sampleRate, n_mfcc=96)
    # mfccScaled = np.mean(mfcc.T, axis=0)
    # return mfccScaled
    return mfcc24, mfcc48, mfcc96

In [15]:
# function to get the MFCC spectrogram not mel spectrogram
def getMfccSpect(mfcc, sampleRate, folName, nameLabel):
    px = 1 / plt.rcParams['figure.dpi']
    cmap = plt.get_cmap('inferno')
    plt.figure(figsize=(128*px,128*px))
    currentStamp = datetime.datetime.now().strftime(dateFormat)
    librosa.display.specshow(mfcc,sr=sampleRate, x_axis='time', y_axis='log', cmap=cmap)
    plt.axis('off')
    if not os.path.exists(f"{BASE_PATH}mfcc/{folName}"):
        os.mkdir(f"{BASE_PATH}mfcc/{folName}")
    plt.savefig(f"{BASE_PATH}mfcc/{folName}/{nameLabel}_{currentStamp}.png", bbox_inches='tight',pad_inches=0.0, dpi=95)
    print(f"Saving MFCC Heat Map: {nameLabel}")
    plt.close()


In [19]:
# plotting mel spectrogramsa
def melSpectrograms(audio, sampleRate, nameLabel):
    px = 1 / plt.rcParams['figure.dpi']
    cmap = plt.get_cmap('inferno')
    # plt.figure(figsize=(128*px,128*px))
    melSpectrogram = librosa.feature.melspectrogram(audio, sr=sampleRate, n_mels=128)
    logMel = librosa.power_to_db(melSpectrogram)
    librosa.display.specshow(logMel, sr=sampleRate, x_axis="time", y_axis='log', cmap=cmap)
    plt.axis('off')
    currentStamp = datetime.datetime.now().strftime(dateFormat)
    if not os.path.exists(f"{BASE_PATH}spectrograms"):
        os.mkdir(f"{BASE_PATH}spectrograms")
    plt.savefig(f"{BASE_PATH}spectrograms/{nameLabel}_{currentStamp}.png", bbox_inches='tight',pad_inches=0.0, dpi=95)
    print(f"Saving Mel Spectrogram of : {nameLabel}")
    plt.close()

In [12]:
# filename = '/content/drive/MyDrive/Masters Project/Dataset/Audio/Final Data/006258102M61l55fP180.wav'
# data, sampleRate = librosa.load(filename)
# spectrogram(data)


# store MFcc and Corresponding labels in the features list

In [20]:
def getFigures(folder):
    for i, (path, subdir, files) in enumerate(os.walk(folder)):
        # nameLabel = path.split("/")[-1]
        for file in files:
            if file.endswith(".wav"):
                fileName = file.split("_")[0]
                data, sampleRate = librosa.load(os.path.join(path, file))
                mfcc24, mfcc48, mfcc96 = extractMfcc(data, sampleRate)
                getMfccSpect(mfcc24, sampleRate, 'mfcc24', fileName)
                getMfccSpect(mfcc48, sampleRate, 'mfcc48', fileName)
                getMfccSpect(mfcc96, sampleRate, 'mfcc96', fileName)
                melSpectrograms(data, sampleRate, fileName)

In [21]:
folder = "../Dataset/"
getFigures(folder)

Saving Mel Spectrogram of : Kupwara
Saving Mel Spectrogram of : Shopian
Saving Mel Spectrogram of : Kupwara
Saving Mel Spectrogram of : Kupwara
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Kupwara
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Kupwara
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Kupwara
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Shopian
Saving Mel Spectrogram of : Shopian
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Shopian
Saving Mel Spectrogram of : Kupwara
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Shopian
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Kupwara
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Islamabad
Saving Mel Spectrogram of : Bandipora


In [None]:
import matplotlib.pyplot as plt
features = []
for index, row in metadata.iterrows():
    filename = f"{BASE_PATH}Audio/Final Data/{row[0]}"
    data, sampleRate = librosa.load(filename)
    # trimAudio, _ = librosa.effects.trim(data, top_db=30)
    # intervals = librosa.effects.split(trimed_audio)
    # currentLabel = row['class_label']
    currentLabel = row[1]
    mfcc24, mfcc48, mfcc96 = extractMfcc(data, sampleRate)
    getMfccSpect(mfcc24, sampleRate, 'mfcc24')
    getMfccSpect(mfcc48, sampleRate, 'mfcc48')
    getMfccSpect(mfcc96, sampleRate, 'mfcc96')
    # melSpectrograms(data, sampleRate)
    

# Create the dataframe from the extarcted features

In [None]:
    mfccDf = pd.DataFrame(features, columns=['mfcc_features', 'label'])
    mfccDf.head()

# Trying the deep learning for training

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = np.array(mfccDf["mfcc_features"].tolist())
y =np.array(mfccDf["label"].tolist())

In [None]:
type(X)

numpy.ndarray

In [None]:
trainx, testx, trainy, testy = train_test_split(X, y, test_size=0.1, random_state=1)

In [None]:
trainx.shape

(27, 20)

In [None]:
import tensorflow as tf

In [None]:
tf.__version__

'2.5.0'

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
trainy = le.fit_transform(trainy)
testy = le.transform(testy)

In [None]:
trainy = trainy.reshape(27,1)

In [None]:
trainy.shape

(27, 1)

In [None]:
testy

array([1, 0, 1, 1])

In [None]:
model = tf.keras.models.Sequential()

In [None]:
model.add(tf.keras.layers.Dense(units=100, activation="relu", input_shape=(20,)))
model.add(tf.keras.layers.Dense(units=200, activation='relu'))
model.add(tf.keras.layers.Dense(units=1, activation='softmax'))

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               2100      
_________________________________________________________________
dense_1 (Dense)              (None, 200)               20200     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 201       
Total params: 22,501
Trainable params: 22,501
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer='adam', loss="categorical_crossentropy", metrics=['accuracy'])

In [None]:
trainy

array([[1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0]])

In [None]:
model.fit(trainx, trainy, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f6ca7d822d0>