In [6]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt

DATASET_PATH = "data_10.json"

# Breaks the data into training and testing sets
def load_data(dataset_path):
    # Open the file
    with open(dataset_path, "r") as fp: 
        data = json.load(fp)

    # Convert lists into numpy arrays
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

    return X, y

# Prepares the dataset for validation/training/testing
def prepare_datasets(test_size, validation_size):

    # load data
    X, y = load_data(DATASET_PATH)
    
    # create the train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    
    # create train/validation split
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    # Tensorflow expects a 3D array for each sample
    # ex.) (130,13,1) --> 1 is the channel for depth
    X_train = X_train[..., np.newaxis] # 4d array from this function -> (num_samples, 130, 13, 1)
    X_validation = X_validation[..., np.newaxis] 
    X_test = X_test[..., np.newaxis]
    
    return X_train, X_validation, X_test, y_train, y_validation, y_test

# Builds the model
def build_model(input_shape):

    # create the model 
    model = keras.Sequential()
    # 1st conv layer
    # Params of add layer:
    # 1.) How many Kernels
    # 2.) Grid Size of kernels
    # 3.) Activation function
    # 4.) Input shape = the (130, 13, 1)
    model.add(keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3, 3), strides=(2,2), padding='same'))
    model.add(keras.layers.BatchNormalization()) # standardizes the activations in current layer, speeds up training
    
    # 2nd conv layer
    model.add(keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3, 3), strides=(2,2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 3rd conv layer
    model.add(keras.layers.Conv2D(32, (2,2), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((2, 2), strides=(2,2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # flatten the output and feed it into dense layer
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer that uses soft max
    model.add(keras.layers.Dense(10, activation='softmax'))

    return model

# Function to predict on a random sample
def predict(model, X, y):

    X = X[np.newaxis, ...]
    # Needs a 4 D array 
    # The "1" specifies = the number of predictions to make 
    prediction = model.predict(X) # X --> (1, 130, 13, 1)

    # The prediction is a 2D array [[0.1, 0.2, ...]]
    # The array will have 10 values for the prediction on each genre for the given sample

    # Extract index with max value
    predicted_index = np.argmax(prediction, axis=1) # [4] --> the index mapped to a genre label

    print("Expected index: {}, Predicted index: {}".format(y, predicted_index))
    

# Steps:
if __name__ == "__main__":
    # create tain, validation, and test sets 

    # Test set is after all the training and parameter tweaking
    # Validation set --> evaluating our model while we tweak the hyperparameters
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)
    

    # build the CNN net
    input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3]) # taking the dimensions (130, 13, 1)
    model = build_model(input_shape)
    
    # compile the network 
    optimizer = keras.optimizers.Adam(learning_rate = 0.0001)
    model.compile(optimizer = optimizer, loss = "sparse_categorical_crossentropy", metrics=["accuracy"])
    
    # train the CNN 
    model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)
    
    # evaluate the CNN on the test set
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print("Accuracy on test set is: {}".format(test_accuracy))
    
    # Make prediction on a sample
    X = X_test[100]
    y = y_test[100]
    
    predict(model, X, y)
    

Epoch 1/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.1967 - loss: 2.5140 - val_accuracy: 0.3758 - val_loss: 1.7536
Epoch 2/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.3624 - loss: 1.8157 - val_accuracy: 0.4526 - val_loss: 1.4882
Epoch 3/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.4298 - loss: 1.6129 - val_accuracy: 0.4960 - val_loss: 1.3788
Epoch 4/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.4668 - loss: 1.5023 - val_accuracy: 0.5214 - val_loss: 1.3124
Epoch 5/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.5017 - loss: 1.3862 - val_accuracy: 0.5374 - val_loss: 1.2644
Epoch 6/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.5068 - loss: 1.3788 - val_accuracy: 0.5487 - val_loss: 1.2327
Epoch 7/30
[1m188/188