what we will do:


* create train, validation and test sets

* build the CNN net

* compile the network

* train the CNN

* evaluate the CNN on the test set

* make prediction on sample






In [11]:
import json
import numpy as np
from sklearn.model_selection import train_test_split

import tensorflow.keras as keras
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense

In [2]:
DATA_PATH = "/content/data.json"

In [12]:
def load_data(data_path):
  ''' loads training dataset from json file'''

  with open(data_path, "r") as fp:
    data = json.load(fp)

  x = np.array(data["mfcc"])
  y = np.array(data["labels"])

  return x,y


In [13]:
## dataset prepration

def prepare_datasets(test_size, validation_size):

  ## load data
  x, y = load_data(DATA_PATH)

  ## create train/test split
  X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=test_size)

  ## trian/validation split
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=validation_size)

  ## the tensorflow expect 3-D array for CNN nets
  ## 3d array --> (130, 13, 1) our expectation , where 130 is time-bins where we are taking 13 MFCCs, channel: 1
  ## bt we have only 2D values so we have to add one extra dimension
  X_train = X_train[..., np.newaxis]  ## give me everything we have and add new axis [...,np.newaxis]
  X_val = X_val[...,np.newaxis]
  X_test = X_test[...,np.newaxis]
  ## it will 4D array --> (num_samples, 130,13,1)

  return X_train, X_val, X_test, y_train, y_val, y_test


In [14]:
### building the CNN model arch

def build_model(input_shape):

  ## create model
  # model = keras.Sequential()
  model = Sequential()


  ## 1st conv layer
  ## (kernel_num, kernel/grid_size, activation, input_shape)
  model.add(keras.layers.Conv2D(32, (3,3), activation='relu',input_shape=input_shape))
  # model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))

  # ## (pooling_size, strides-by how much we move the kernel to get next value, padding-what to do at edges)
  model.add(keras.layers.MaxPool2D((3,3), strides=(2,2), padding='same'))
  model.add(keras.layers.BatchNormalization())

  #print("first layer done")

  # ## 2nd conv layer
  # model.add(keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape))
  model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
  model.add(keras.layers.MaxPool2D((3,3), strides=(2,2), padding='same'))
  model.add(keras.layers.BatchNormalization())

  #print("second layer done")


  # ## 3rd conv layer
  # model.add(keras.layers.Conv2D(32, (2,2), activation='relu', input_shape=input_shape))
  model.add(Conv2D(filters=32, kernel_size=(2, 2), activation='relu', input_shape=input_shape))
  model.add(keras.layers.MaxPool2D((2,2), strides=(2,2), padding='same'))
  model.add(keras.layers.BatchNormalization())

  #print("third layer done")


  # ## flatten the output and feed it into dense layer
  model.add(keras.layers.Flatten())
  #print("flatten layer done")


  # ## (num_nuerons, activaton)
  model.add(Dense(64,activation='relu'))
  model.add(keras.layers.Dropout(0.3))                    ## to avoid over fitting
  #print("fuully layer done")

  # ## output layer
  # ## we have 10 genre - so 10 nuerons in output layer
  model.add(Dense(10, activation='softmax'))
  #print("outpurt layer done")


  return model


In [198]:
if __name__ == "__main__":

  ### create train, validation and test sets
  X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

  ## build the CNN net
  input_shape =(X_train.shape[1], X_train.shape[2], X_train.shape[3])
  model = build_model(input_shape)

  ## compile the network
  optimizer = keras.optimizers.Adam(learning_rate=0.0001)
  model.compile(optimizer=optimizer,
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
  model.summary()
  # train the CNN
  model.fit(X_train, y_train, validation_data=(X_validation,y_validation),
            batch_size=32, epochs=30)

  ## evaluate the CNN on the test set
  test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
  print("Accuracy on test set is :{}".format(test_accuracy))



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 128, 11, 32)       320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 64, 6, 32)         0         
 D)                                                              
                                                                 
 batch_normalization (Batch  (None, 64, 6, 32)         128       
 Normalization)                                                  
                                                                 
 conv2d_1 (Conv2D)           (None, 62, 4, 32)         9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 31, 2, 32)         0         
 g2D)                                                            
                                                        

In [199]:
## make prediction on sample
def predict(model, X, y):
  X = X[np.newaxis, ...]
  predictions =  model.predict(X)  ## X ---> (130,13,1) 3-D arry , model.predict() -expects 4D array (num_samples=1, 130, 13, 1)

  ## predictions we get is 2D values [[0.1, 0.2, 0.3,.....]] - 10 values the values repersents different scores for each Genre
  ## we have predicted index : so we have to get the highest value among them
  predict_index = np.argmax(predictions, axis=1)    ## get the index with highest value from index 1
  print("Expected index :{} \nPredicted index: {}".format(y, predict_index))
  return predict_index
X = X_test[29]
y = y_test[29]

## we have 250 test-set

per_i = predict(model, X, y)
maps = [
        "blues",
        "classical",
        "country",
        "disco",
        "hiphop",
        "jazz",
        "metal",
        "pop",
        "reggae",
        "rock"
    ]

print("\n\nExpected Genre : {} \nPredicted Genre: {}".format(maps[y], maps[per_i[0]]))


Expected index :1 
Predicted index: [1]


Expected Genre : classical 
Predicted Genre: classical


In [200]:
X = X_test[101]
y = y_test[101]

## we have 250 test-set

per_i = predict(model, X, y)
print("\n\nExpected Genre : {} \nPredicted Genre: {}".format(maps[y], maps[per_i[0]]))


Expected index :8 
Predicted index: [6]


Expected Genre : reggae 
Predicted Genre: metal


In [201]:
X = X_test[100]
y = y_test[100]

## we have 250 test-set

per_i = predict(model, X, y)
print("\n\nExpected Genre : {} \nPredicted Genre: {}".format(maps[y], maps[per_i[0]]))


Expected index :6 
Predicted index: [6]


Expected Genre : metal 
Predicted Genre: metal
