### EXTACTION OF DATA

In [1]:
#extract_data
  
import json
import os
import math
import librosa
import numpy as np

In [2]:
DATASET_PATH = "Sounds"
JSON_PATH = "data_10.json"
SAMPLE_RATE = 22050
TRACK_DURATION = 1 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION

In [3]:
data = {
        "mapping": [],
        "labels": [],
        "mfcc": []
    }
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(DATASET_PATH)):

        # ensure we're processing a genre sub-folder level
        if dirpath is not DATASET_PATH:

            # save genre label (i.e., sub-folder name) in the mapping
            semantic_label = dirpath.split("/")[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing: {}".format(semantic_label))
            for f in filenames:
                file_path = os.path.join(dirpath, f)
                print("{},".format(file_path))


Processing: Sounds\ME
Sounds\ME\a-bush-in-kazakhstan-will-laugh-when-we-hear-the-name-becaus-it-bush-em-in-the-hair-around-the-testes-satchel.wav,
Sounds\ME\a-little-better-tell-me-if-you-like-to-make-a-sex-crime-high-five.wav,
Sounds\ME\a-little.wav,
Sounds\ME\a-very-nice-he-have-a-like-a-tool-shaved-horses-bladder-over-river.wav,
Sounds\ME\a-very-nice.wav,
Sounds\ME\all-day-long-when-i-see-you-i-think-of-you-know-clothes-wow-we-were-but-is-a-very-wonderful.wav,
Sounds\ME\and-best-thing-of-all.wav,
Sounds\ME\and-he-is-a-strong-man-he-will-crush-his-opponents.wav,
Sounds\ME\and-he-will-be-powerful-alike-styling-and-not-to-tolerate-people-who-are-bad.wav,
Sounds\ME\and-here-will-be-powerful-and-like-a-styling-and-the-note-to-tolerate-people-who-are-bad.wav,

Processing: Sounds\UK
Sounds\UK\after-three-years-of-unfounded-self-doubt-it-is-time-to-change-the-record.wav,
Sounds\UK\against-the-pluck-a-nerve-and-ambition-of-this-country.wav,
Sounds\UK\all-this-and-more-we-can-do-now-and-only

In [4]:
def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):

    # dictionary to store mapping, labels, and MFCCs
    data = {
        "mapping": [],
        "labels": [],
        "mfcc": []
    }

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    # loop through all genre sub-folder
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # ensure we're processing a genre sub-folder level
        if dirpath is not dataset_path:

            # save genre label (i.e., sub-folder name) in the mapping
            semantic_label = dirpath.split("/")[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing: {}".format(semantic_label))

            # process all audio files in genre sub-dir
            for f in filenames:

		# load audio file
                file_path = os.path.join(dirpath, f)
                signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)

                # process all segments of audio file
                for d in range(num_segments):

                    # calculate start and finish sample for current segment
                    start = samples_per_segment * d
                    finish = start + samples_per_segment

                    # extract mfcc
                    mfcc = librosa.feature.mfcc(signal[start:finish], sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                    mfcc = mfcc.T

                    # store only mfcc feature with expected number of vectors
                    if len(mfcc) == num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path, d+1))

    # save MFCCs to json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
        


In [5]:
save_mfcc(DATASET_PATH, JSON_PATH, num_segments=1)


Processing: Sounds\ME
Sounds\ME\a-bush-in-kazakhstan-will-laugh-when-we-hear-the-name-becaus-it-bush-em-in-the-hair-around-the-testes-satchel.wav, segment:1
Sounds\ME\a-little-better-tell-me-if-you-like-to-make-a-sex-crime-high-five.wav, segment:1
Sounds\ME\a-little.wav, segment:1
Sounds\ME\a-very-nice-he-have-a-like-a-tool-shaved-horses-bladder-over-river.wav, segment:1
Sounds\ME\a-very-nice.wav, segment:1
Sounds\ME\all-day-long-when-i-see-you-i-think-of-you-know-clothes-wow-we-were-but-is-a-very-wonderful.wav, segment:1
Sounds\ME\and-best-thing-of-all.wav, segment:1
Sounds\ME\and-he-is-a-strong-man-he-will-crush-his-opponents.wav, segment:1
Sounds\ME\and-he-will-be-powerful-alike-styling-and-not-to-tolerate-people-who-are-bad.wav, segment:1
Sounds\ME\and-here-will-be-powerful-and-like-a-styling-and-the-note-to-tolerate-people-who-are-bad.wav, segment:1

Processing: Sounds\UK
Sounds\UK\after-three-years-of-unfounded-self-doubt-it-is-time-to-change-the-record.wav, segment:1
Sounds\UK\

### TRAINING MODEL

In [6]:

import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras

In [7]:
DATA_PATH = "data_10.json"

def load_data(data_path):
    """Loads training dataset from json file.
        :param data_path (str): Path to json file containing data
        :return X (ndarray): Inputs
        :return y (ndarray): Targets
    """

    with open(data_path, "r") as fp:
        data = json.load(fp)

    # convert lists to numpy arrays
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

    print("Data succesfully loaded!")

    return  X, y

In [8]:
    # load data
    X, y = load_data(DATA_PATH)

    # create train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    # build network topology
    model = keras.Sequential([

        # input layer
        keras.layers.Flatten(input_shape=(X.shape[1], X.shape[2])),

        # 1st dense layer
        keras.layers.Dense(512, activation='relu'),

        # 2nd dense layer
        keras.layers.Dense(256, activation='relu'),

        # 3rd dense layer
        keras.layers.Dense(64, activation='relu'),

        # output layer
        keras.layers.Dense(10, activation='softmax')
    ])

Data succesfully loaded!


In [9]:
# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

In [10]:
history = model.fit(X_train, y_train, batch_size=32, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
print("Evaluate on test data")
results, labels = model.evaluate(X_test, y_test, batch_size=32)
print("test loss, test acc:", results)
print(labels)

Evaluate on test data
test loss, test acc: 3.4797005653381348
0.8888888955116272
