# Information about the notebook
The notebook is splitted into 4 parts: <br>
1: Basics <br>
2: Load the model to test it <br>
3: Prepare the test data <br>
4: Test the model <br>


# Basics

In [0]:
#Python packages for preparing the data
import os
import shutil as sys
import glob
import sys
import csv

#Python packages for audio files
import wave
import librosa   #for audio processing
import numpy as np
from scipy.io import wavfile #for audio processing
import pandas as pd
import sklearn
import tensorflow as tf


In [0]:
from google.colab import drive
drive.mount('/content/drive')

# Load the final model

In [0]:
from tensorflow.keras.models import load_model
model = load_model('/content/drive/My Drive/test_data/final_model.h5')
model.summary()


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, 20, 1024)          3197952   
_________________________________________________________________
dropout (Dropout)            (None, 20, 1024)          0         
_________________________________________________________________
gru_1 (GRU)                  (None, 20, 512)           2360832   
_________________________________________________________________
dropout_1 (Dropout)          (None, 20, 512)           0         
_________________________________________________________________
gru_2 (GRU)                  (None, 20, 256)           590592    
_________________________________________________________________
dropout_2 (Dropout)          (None, 20, 256)           0         
_________________________________________________________________
gru_3 (GRU)                  (None, 20, 128)          

# Prepare the data

Unzip the test data

In [0]:
#for Google Drive
!unzip -uq '/content/drive/My Drive/test_data/comparison_data.zip'

Delete the folder

In [0]:
!rm -rf __MACOSX/

Create a label list

In [0]:
label_list = os.listdir('/content/comparison_data')
print("Original:", label_list)
label_list.remove('.DS_Store')
label_list.remove('_background_noise_')
print("Removed:",label_list)

Original: ['_background_noise_', 'balls_screw_drive', 'error_logs', '.DS_Store', 'air_pressure_alert', 'stop', 'reutlingen', 'cnc', 'measurement_system', 'alert', 'nc_alert']
Removed: ['balls_screw_drive', 'error_logs', 'air_pressure_alert', 'stop', 'reutlingen', 'cnc', 'measurement_system', 'alert', 'nc_alert']


Define func to convert the wav files into mfcc data

In [0]:
def create_array_with_samples(label_list, audio_path):

  length = 0
  all_wave = []
  all_label = []

  for label in label_list:
      print("Label: ", label)
      waves = [f for f in os.listdir(audio_path + '/'+ label) if f.endswith('.wav')]
      #Get the number of all files
      length += len(waves)

      for wav in waves:
          print(audio_path + '/' + label + '/' + wav)

          #Get sample rate
          #with wave.open(audio_path + '/' + label + '/' + wav, "rb") as wave_file:
          #    frame_rate = wave_file.getframerate()

          #Extract samples in array
          samples, sample_rate = librosa.load(audio_path + '/' + label + '/' + wav, sr = 8000)
          samples = librosa.resample(samples, len(samples), 8000)
          
          if(len(samples)== 8000) : 
              all_wave.append(samples)
              all_label.append(label)
          else:
            print("Wrong Sample Rate:", len(samples)," File:", label + "/" + wav)

          #Create 2 empty lists, seq = sequential for RNN
          x_seq_train = []


          for i in range(0,len(all_wave)):
              add = librosa.feature.mfcc(all_wave[i].astype(float),sr=8000)
              #Normalize each element of the array
              add = sklearn.preprocessing.normalize(add)
              #Add the mfcc data to the RNN input array
              x_seq_train.append(add)

  return x_seq_train, all_label, length

Call the function

In [0]:
all_wave, all_label, length = create_array_with_samples(label_list, '/content/comparison_data')
#Check the sizes
#Print the length of the arrays
print("Length all_wave array", type(all_wave), len(all_wave), all_wave[1].shape)
print("Length all_label", type(all_label), len(all_label))
print("Loops: ", length)

Label:  balls_screw_drive
/content/comparison_data/balls_screw_drive/20_no_hash_1.wav
/content/comparison_data/balls_screw_drive/20_no_hash_0.wav
/content/comparison_data/balls_screw_drive/3_no_hash_1.wav
/content/comparison_data/balls_screw_drive/3_no_hash_0.wav
Label:  error_logs
/content/comparison_data/error_logs/14_no_hash_1.wav
/content/comparison_data/error_logs/34_no_hash_0.wav
/content/comparison_data/error_logs/14_no_hash_0.wav
/content/comparison_data/error_logs/34_no_hash_1.wav
Label:  air_pressure_alert
/content/comparison_data/air_pressure_alert/4_no_hash_0.wav
/content/comparison_data/air_pressure_alert/57_no_hash_1.wav
/content/comparison_data/air_pressure_alert/4_no_hash_1.wav
/content/comparison_data/air_pressure_alert/57_no_hash_0.wav
Label:  stop
/content/comparison_data/stop/19_no_hash_1.wav
/content/comparison_data/stop/45_no_hash_0.wav
/content/comparison_data/stop/19_no_hash_0.wav
/content/comparison_data/stop/45_no_hash_1.wav
Label:  reutlingen
/content/compari

Safe the labels in a csv file for every record

In [0]:
my_df = pd.DataFrame(all_label)
my_df.to_csv('test_data_true.csv', index=False, header=False)

Code the labels 

In [0]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

output = le.fit_transform(all_label)
classes= list(le.classes_)
print(output)
print(output[1])

#Note: variables must be int!
from keras.utils import np_utils

output = np_utils.to_categorical(output, num_classes=len(label_list))

print(output)

[2 2 2 2 4 4 4 4 0 0 0 0 8 8 8 8 7 7 7 7 3 3 3 3 5 5 5 5 1 1 1 1 6 6 6 6]
2
[[0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0

Using TensorFlow backend.


Safe coded labels into csv file

In [0]:
my_df = pd.DataFrame(output)
my_df.to_csv('test_data_output.csv', index=False, header=False)

# Test the model


In [0]:
all_wave = np.array(all_wave)

y_pred = model.predict(all_wave)
print(y_pred.shape)
print(y_pred)

(36, 9)
[[6.39926657e-06 1.06687858e-05 9.99735415e-01 2.01892385e-06
  2.95113227e-06 4.52028216e-05 6.40150392e-05 3.85191597e-05
  9.48870365e-05]
 [3.56733523e-07 8.56019653e-07 9.99969006e-01 2.03621710e-07
  1.89178806e-07 3.79312155e-06 3.15611533e-06 4.66809388e-06
  1.77975380e-05]
 [2.42884744e-05 2.73544774e-05 9.99344647e-01 2.92693812e-06
  9.00172017e-06 6.81202000e-05 3.40743631e-04 5.42681737e-05
  1.28695843e-04]
 [1.59931247e-07 5.76081277e-07 9.99972939e-01 1.22561943e-07
  1.13959871e-07 4.34691992e-06 2.31949184e-06 6.59879470e-06
  1.28313786e-05]
 [9.73927617e-01 7.68517843e-04 2.09579768e-04 1.34717426e-04
  9.45490971e-03 4.31565708e-03 9.34860390e-03 4.82868316e-04
  1.35762896e-03]
 [8.27650147e-05 1.01733464e-03 2.35944506e-04 1.86197503e-04
  9.97510672e-01 4.92229883e-05 2.35730713e-05 7.02868914e-04
  1.91467334e-04]
 [1.51119832e-06 5.27240263e-06 4.71186240e-06 1.58957357e-06
  9.99974489e-01 1.52288166e-07 1.87277621e-07 1.07511160e-05
  1.27897317e-06

Safe predictions as a csv file

In [0]:
my_df = pd.DataFrame(y_pred)
my_df.to_csv('test_data_prediction.csv', index=False, header=False)