<a href="https://colab.research.google.com/github/murilomatutino/cnn-arm-alfa/blob/main/ArmAlfa_cnn_genre_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras

DATA_PATH = "drive/MyDrive/IA_ArmAlfa_dataset/data.json"

def load_data(data_path):

  with open(data_path, "r") as fp:
    data = json.load(fp)

  max_padding_shape0 = max(np.array(mfcc).shape[0] for mfcc in data["mfcc"]) # qtd de linha do maior mfcc
  max_padding_shape1 = max(np.array(mfcc).shape[1] for mfcc in data["mfcc"]) # qtd de colunas do maior mfcc

  # faz com que todos os mfcc tenham o mesmo tamnaho (dimensão)
  for i in range(len(data["mfcc"])):
    mfcc = np.array(data["mfcc"][i])

    mfcc = np.pad(mfcc, ((0, max_padding_shape0 - mfcc.shape[0]), (0, max_padding_shape1 - mfcc.shape[1])), mode='constant')
    data["mfcc"][i] = mfcc


  x = np.array(data["mfcc"])
  y = np.array(data["labels"])

  return x, y



def create_sets(test_size, validation_size):

  # load  data
  x, y = load_data(DATA_PATH)

  # create rain/test split
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size)

  # create train/validation split
  x_train, x_validation, y_train, y_validation = train_test_split(x_train, y_train, test_size=validation_size)

  # 3d array -> (130, 13, 1)
  x_train = x_train[..., np.newaxis] # 4d array -> (num_samples, 130, 13, 1)
  x_validation = x_validation[..., np.newaxis]
  x_test = x_test[..., np.newaxis]



  return x_train, x_validation, x_test, y_train, y_validation, y_test




def build_model(input_shape):

  # create model
  model = keras.Sequential()

  # 1st conv layer
  model.add(keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape))
  model.add(keras.layers.MaxPool2D((3,3), strides=(2,2), padding='same'))
  model.add(keras.layers.BatchNormalization())

  # 2st conv layer
  model.add(keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape))
  model.add(keras.layers.MaxPool2D((3,3), strides=(2,2), padding='same'))
  model.add(keras.layers.BatchNormalization())

  # 3st conv layer
  model.add(keras.layers.Conv2D(32, (2,2), activation='relu', input_shape=input_shape))
  model.add(keras.layers.MaxPool2D((2,2), strides=(2,2), padding='same'))
  model.add(keras.layers.BatchNormalization())

  # flatten the output and feed it into  dense layer
  model.add(keras.layers.Flatten())
  model.add(keras.layers.Dense(64, activation='relu'))
  model.add(keras.layers.Dropout(0.3))

  # output layer
  model.add(keras.layers.Dense(10, activation='softmax'))

  return model



def predict(model, x, y):

  x = x[np.newaxis, ...]

  # prediction
  prediction = model.predict(x)

  # extract index with max value
  predicted_index = np.argmax(prediction, axis=1)
  print("Expected index: {}, predicted index: {}".format(y, predicted_index))



if __name__ == "__main__":

  # create train, validation and teste sets
  x_train, x_validation, x_test, y_train, y_validation, y_test = create_sets(0.25, 0.2 )

  # build the CNN net
  input_shape = (x_train.shape[1], x_train.shape[2], x_train.shape[3]) # --> (130, 13, 1)

  model = build_model(input_shape)

  # compile the network
  optimizer = keras.optimizers.Adam(learning_rate=0.0001)
  model.compile(optimizer=optimizer,
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])


  # train the CNN
  model.fit(x_train, y_train, validation_data=(x_validation, y_validation), batch_size=256, epochs=72) #batch_size=32, epochs=30

  # evaluate the CNN on test set
  test_error, test_accuracy = model.evaluate(x_test, y_test, verbose=1)
  print("Accuracy on test set is: {}".format(test_accuracy))
  print("Error on test set is: {}".format(test_error))

  # make prediction on a sample

  x = x_test[100]
  y = y_test[100]

  #for i in range(0, 500, 50):
  #  x = x_test[i]
  #  y = y_test[i]

  predict(model, x, y)


Epoch 1/72


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 414ms/step - accuracy: 0.1648 - loss: 2.6694 - val_accuracy: 0.1206 - val_loss: 3.1656
Epoch 2/72
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 351ms/step - accuracy: 0.2834 - loss: 2.1426 - val_accuracy: 0.1786 - val_loss: 2.7080
Epoch 3/72
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 357ms/step - accuracy: 0.3584 - loss: 1.8961 - val_accuracy: 0.2107 - val_loss: 2.4937
Epoch 4/72
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 369ms/step - accuracy: 0.4083 - loss: 1.7297 - val_accuracy: 0.2786 - val_loss: 2.2209
Epoch 5/72
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 380ms/step - accuracy: 0.4368 - loss: 1.6243 - val_accuracy: 0.3298 - val_loss: 1.9817
Epoch 6/72
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 405ms/step - accuracy: 0.4880 - loss: 1.5263 - val_accuracy: 0.3656 - val_loss: 1.8092
Epoch 7/72
[1m21/21[0m [32m━━━━

KeyboardInterrupt: 