In [None]:
!pip install tensorflow.io

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_io as tfio
import numpy as np
import matplotlib.pyplot as plt

datasets = tfds.load("nsynth", data_dir="data", try_gcs=True)
train_dataset, test_dataset, valid_dataset = datasets["train"], datasets["test"], datasets["valid"]

# filters out the synth-lead class
def filter_fn(x):
    if x["instrument"]["family"]==9:
        return False
    else:
        return True

# downsamples the data and moves the 10th class to 9th
def preProcessing(inputs):
  x = np.array([9], dtype=np.int64)
  inputs["audio"] = tf.expand_dims(
                     tfio.audio.resample(inputs["audio"] , 16000, 8000)
                     , 1)
  inputs["instrument"]["family"] = tf.where(inputs["instrument"]["family"]==10, x, inputs["instrument"]["family"])
  return (inputs["audio"], inputs["instrument"]["family"])

def processDataset(dataset):
  dataset = dataset.filter(filter_fn)
  dataset = dataset.shuffle(1024)     # shuffles data of buffer-size 1024
  dataset = dataset.map(preProcessing)
  dataset = dataset.batch(50, drop_remainder=True)    # batch-size 50
  return dataset

# Learning curve
def print_accuracy(history):
  plt.plot(history.history['accuracy'], label='accuracy')
  plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.ylim([0, 1])
  plt.legend(loc='lower right')
  pltPath = path + '/LSTM_accuracy.png'
  plt.savefig(pltPath, bbox_inches='tight')
  plt.show()

# train_dataset = train_dataset.filter(filter_fn)
train_dataset = processDataset(train_dataset)
valid_dataset = processDataset(valid_dataset)

In [None]:
# Model 2 (CNN-LSTM)
def createLSTMMOdel():
  model = tf.keras.models.Sequential()
  
  model.add(tf.keras.layers.Conv1D(32, 3, activation="relu", input_shape=(32000, 1), padding='same'))
  model.add(tf.keras.layers.MaxPooling1D(2))
  model.add(tf.keras.layers.Dropout(0.2))
  
  model.add(tf.keras.layers.Conv1D(64, 3, activation="relu", padding='same'))
  model.add(tf.keras.layers.MaxPooling1D(2))
  model.add(tf.keras.layers.Dropout(0.4))
  
  model.add(tf.keras.layers.LSTM(32, return_sequences=True))
  
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(64, activation="relu"))
  model.add(tf.keras.layers.Dropout(0.2))
  
  model.add(tf.keras.layers.Dense(10, activation="softmax"))
  
  model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.0003),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=["accuracy"])

LSTM_model = createLSTMModel()
print(LSTM_model.summary())
LSTM_history = LSTM_model.fit(train_dataset, epochs=10, validation_data=valid_dataset)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_2 (Conv1D)            (None, 32000, 32)         128       
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 16000, 32)         0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 16000, 32)         0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 16000, 64)         6208      
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 8000, 64)          0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 8000, 64)          0         
_________________________________________________________________
lstm (LSTM)                  (None, 8000, 32)         

In [None]:
# Model 1 (CNN)
def createCNNModel():
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.Conv1D(32, 3, activation="relu", input_shape=(32000, 1), padding='same'))
  model.add(tf.keras.layers.MaxPooling1D(2))
  model.add(tf.keras.layers.Dropout(0.2))

  model.add(tf.keras.layers.Conv1D(64, 3, activation="relu", padding='same'))
  model.add(tf.keras.layers.MaxPooling1D(2))
  model.add(tf.keras.layers.Dropout(0.5))

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(64, activation="relu"))
  model.add(tf.keras.layers.Dropout(0.2))

  model.add(tf.keras.layers.Dense(11, activation="softmax"))

  model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.0001),
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                metrics=["accuracy"])
  
CNN_Model = createCNNModel()

CNN_history = CNN_Model.fit(train_dataset, epochs=10, validation_data=valid_dataset)


In [None]:
# Processing the testing dataset
test_dataset = test_dataset.filter(filter_fn)
test_dataset = test_dataset.map(preProcessing)
test_dataset = test_dataset.batch(1, drop_remainder=True)

# separating the data and labels from the test dataset
def mapping(x):
  return list(map(lambda y:y[0], x))
X_test = list(map(lambda x:mapping(x[0].numpy()[0]), test_dataset))
Y_test = list(map(lambda x:x[1][0].numpy()[0], test_dataset))

def predictFromModel(model):
  predictions = LSTM_model.predict(test_dataset, verbose=2)
  Y_pred = np.argmax(LSTM_predictions, axis=-1)
  return predictions, Y_pred

CNN_predictions, CNN_Y_pred = predictFromModel(CNN_Model)
LSTM_predictions, LSTM_Y_pred = predictFromModel(LSTM_Model)



In [None]:
test_loss, test_acc = LSTM_model.evaluate(test_dataset, verbose=2)
print(test_acc)

4096/4096 - 282s - loss: 4.1661 - accuracy: 0.6118
0.61181640625


In [None]:
test_loss, test_acc = CNN_model.evaluate(test_dataset, verbose=2)
print(test_acc)

In [None]:

import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

# Confusion matrix
def plotConfusionMatrix(Y_test, Y_pred, path, model_Name):
  confusion_matrix = tf.math.confusion_matrix(Y_test, Y_pred, 10) .numpy()
  print(confusion_matrix)
  df_cm = pd.DataFrame(confusion_matrix, index = [i for i in "ABCDEFGHIJ"],
                  columns = [i for i in "ABCDEFGHIJ"])
  plt.figure(figsize = (12,7))
  sn.heatmap(df_cm, annot=True, cmap="YlGnBu")
  plt.ylabel('True Values')
  plt.xlabel('Predicted Values')
  plt.savefig(path+"/ConfusionMatrix_"+model_Name+".png")
  plt.show()

plotConfusionMatrix(Y_test,LSTM_Y_pred, LSTM_path, "LSTM")
plotConfusionMatrix(Y_test,CNN_Y_pred, CNN_path, "CNN")

In [None]:
# saving model to google drive
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

LSTM_path = F"/content/gdrive/My Drive/LSTM" 
LSTM_Model.save(LSTM_path)
print('Saved trained model at %s ' % path)

CNN_path = F"/content/gdrive/My Drive/CNN" 
CNN_Model.save(CNN_path)
print('Saved trained model at %s ' % path)

In [None]:
import numpy as np


instrument_families = {0:	"bass", 1	:"brass", 2:'flute', 3:"guitar",4:"keyboard",5:"mallet",6:"organ",7:"reed",8:"string",
  9:"vocal"}

# Visualizations for correct class probabilities and frequency(histogram) of correct classifications and mis-classifications
def plotCorrectClassProbabilitiesAndHistograms(predictions, path, modelName):
  highProbabilityRows={0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0}
  highProbabilities={0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0}
  i=0
  #Find the audio rows in the dataset that are correctly classified with highest probabilities
  for r in test_dataset:
    actual = r[1]

    for j in range(0, len(predictions[i])-1):
      current = highProbabilities[j]
      if predictions[i][j] > current:
        highProbabilities[j] = predictions[i][j]
        highProbabilityRows[j] = i

    i = i+1

  # Plotting the correct Class probabilities
  i=0
  
  rows = list(highProbabilityRows.values());
  for r in test_dataset:
    if i in rows:
      class_index = rows.index(i)
      class_name = instrument_families[class_index]
      signal =r[0][0]
      plt.figure(1)
      plt.clf()
      plt.title("Signal Wave - "+class_name + ", probability - "+ str(predictions[i][class_index]))
      plt.plot(signal)
      plt.savefig(path+"/Visualizations/correctClassProbabilities/"+class_name)
      plt.show()
    i = i+1

    # Plot the histograms for the correcctly classified ones
  plt.rcParams.update({'figure.figsize':(7,5), 'figure.dpi':100})

  # Plot Histogram on x
  for i in range(0,10):
    row_index = highProbabilityRows[i]
    class_name = instrument_families[i]
    plt.clf()
    plt.hist(X_test[row_index][:-16000], bins=100)
    plt.gca().set(title=class_name+': Correct Class Frequency Histogram', ylabel='Frequency');
    plt.savefig(path+"/Visualizations/correctClassHistograms/Histograms_"+class_name+"_"+ modelName)
    plt.show()

plotCorrectClassProbabilitiesAndHistograms(CNN_predictions,CNN_path, "CNN")
plotCorrectClassProbabilitiesAndHistograms(LSTM_predictions,LSTM_path, "LSTM")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import wave
import sys
i=0

copy_predictions = np.copy(predictions)

neardecisionBoundaries_probab = {0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0}
neardecisionBoundaries_indices = {0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0}
decisionBoundaries_diff = {0:1,1:1,2:1,3:1,4:1,5:1,6:1,7:1,8:1,9:1}
max_probabilities = []
nextMax_prob = []
nextMax_Class = []

# Instruments near decision boundary - visualization
def plotInstrumentsNearDecisionBoundaries(predictions, path, ModelName):
  i =0
  for pred in copy_predictions:
    max = np.argmax(pred)
    max_prob = pred[max]
    max_probabilities.append(max_prob)
    pred = np.delete(pred, max)
    nextMax_probab = np.max(pred)
    nextMaxIndex = np.where(predictions[i]==nextMax_probab)
    nextMax_prob.append(nextMax_probab)
    nextMax_Class.append(nextMaxIndex[0][0])
    i = i+1

  for i in range(0,len(predictions)):
    diff = max_probabilities[i] - nextMax_prob[i]
    if diff < decisionBoundaries_diff[nextMax_Class[i]]:
      decisionBoundaries_diff[nextMax_Class[i]] = diff
      neardecisionBoundaries_probab[nextMax_Class[i]] = nextMax_prob[i]
      neardecisionBoundaries_indices[nextMax_Class[i]] = i

  rows = list(neardecisionBoundaries_indices.values())
  i=1
  for r in test_dataset:
    if i in rows:
      class_index = rows.index(i)
      class_name = instrument_families[class_index]
      decisionClass = np.argmax(predictions[i])
      decisionProbability = predictions[i][decisionClass]
      decisionClass_name = instrument_families[decisionClass]
      signal =r[0][0]
      plt.figure(1)
      plt.clf()
      plt.suptitle("Correct Class: "+class_name + ", probability = "+ str(predictions[i][class_index]))
      plt.title('Near by: '+ decisionClass_name+ ", probabilitty = "+str(decisionProbability) )
      plt.plot(signal)
      plt.savefig(path+"/Visualizations/nearDecisionClassProbabilities/"+class_name)
      plt.show()
    i = i+1

  plt.rcParams.update({'figure.figsize':(7,5), 'figure.dpi':100})  
  # Plot Histogram for misclassified ones
  for i in range(0,10):
    row_index = neardecisionBoundaries_indices[i]
    class_name = instrument_families[i]
    plt.clf()
    plt.hist(X_test[row_index][:-16000], bins=100)
    plt.gca().set(title='Histogram: '+class_name+' misclassified as '+ instrument_families[Y_pred[row_index]], ylabel='Frequency');
    plt.savefig(path+"/Visualizations/missClassificationHistograms/Histogram_MisClassified"+class_name+"_"+ModelName)
    plt.show()

plotInstrumentsNearDecisionBoundaries(CNN_predictions,CNN_path, "CNN")
plotInstrumentsNearDecisionBoundaries(LSTM_predictions,LSTM_path, "LSTM")