In [25]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt

DATA_PATH = "data.json"

In [2]:
# load data
def load_data(dataset_path):
    with open(dataset_path, "r") as fp:
        data = json.load(fp)
        
    #convert lists to numpy arrays
    inputs = np.array(data["mfcc"])
    targets = np.array(data["labels"])
    
    return inputs, targets

In [3]:
# create, train, validation and test sets
def prepare_datasets(test_size, validation_size):
    # load data 
    X, y = load_data(DATA_PATH)
    
    # create train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    
    # create train/validation split
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    # 3d array for each sample... (130, 13) add one 1 dimension for depth
    X_train = X_train[..., np.newaxis] # --> 4d array (num_samples 130, 13, 1)
    X_validation = X_validation[..., np.newaxis]
    X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test

X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2) # test size, validation size

In [4]:
# build CNN networks
def build_model(input_shape):
    # create model
    model = keras.Sequential()
    
    # 3 CNN  numkernels, kernal size, activation
    # pooling 
    model.add(keras.layers.Conv2D(32, (3,3), activation="relu", input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3,3), strides=(2,2), padding='same'))
    model.add(keras.layers.BatchNormalization()) # a process that normalizes the activations --> speed up training --> more reliable

    model.add(keras.layers.Conv2D(32, (3,3), activation="relu", input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3,3), strides=(2,2), padding='same'))
    model.add(keras.layers.BatchNormalization()) # a process that normalizes the activations --> speed up training --> more reliable
    
    model.add(keras.layers.Conv2D(32, (2,2), activation="relu", input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((2,2), strides=(2,2), padding='same'))
    model.add(keras.layers.BatchNormalization()) # a process that normalizes the activations --> speed up training --> more reliable
    
    # flatten/dense
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))
    
    #ouput
    model.add(keras.layers.Dense(10, activation="softmax"))
    
    
    return model
              
    
input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
model = build_model(input_shape)

In [5]:
# compile the network
optimizer = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer, 
             loss="sparse_categorical_crossentropy",
             metrics=["accuracy"])

In [6]:
# train the CNN 
model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f94602d6910>

In [7]:
# evaluate CNN on the test set
test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print("accuracy on test set is {}".format(test_accuracy))

accuracy on test set is 0.709083616733551


In [10]:
# make prediction on a sample
def predict(model, X, y):
    # X is 3-dim (13, 13, 1) BUT needs an extra dim at the beginning to specify num samples we want to predict
    X = X[np.newaxis, ...]
    
    # prediction [[softmax results]] ... so... 
    prediction = model.predict(X) 
    
    #extract max index
    predicted_index = np.argmax(prediction, axis=1) # yeilds [3], e.g.
    
    print("expected index: {}, predicted: {}".format(y, predicted_index))
    return predicted_index[0]

expected index: 5, predicted: [5]


In [21]:
X = X_test[500]
y = y_test[500]

index = predict(model, X, y)

expected index: 8, predicted: [8]


In [17]:
with open(DATA_PATH, "r") as fp:
    data = json.load(fp)

In [22]:
data["mapping"][index]

'country'

NameError: name 'pt_musicutils' is not defined