# Machinery classification task

The goal of this notebook will be to train a model to recogize the different type of machinery depending on the melspectrogram of audio sample. 

To do that we will use convolutionnal neural network CNN to extract features from the melspectrogram. 

In [2]:
#Let's first import the modules wee need
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import seaborn as sns
from tensorflow.keras.models import load_model
import os
from tensorflow.keras.layers import Input, Dense, Dropout, Conv2D, MaxPooling2D, Flatten, BatchNormalization
from tensorflow.keras.models import Model

In [150]:
#Constants
PATH_FEATURES_FOLDER = './Features/'
PATH_MELSPEC_313_128_FOLDER = PATH_FEATURES_FOLDER + 'melspec_313_128/'

## Train, validation, test dataset

The spectrogram are stored in the folder Features/melspec_313_128/. Each machinery has its folder fan/, valve/ etc... <br />
Each audio sample has its own mespectrogram flattened stored as a .npy file. So the file is one line of 313*128 = 40064 features

In [151]:
#Get file paths and labels
path_files = []
labels = []

#Walk through melspectrogram folders
for subdirectory, directory, files in os.walk(PATH_MELSPEC_313_128_FOLDER):
    
    #Get label using directory folder name
    label = subdirectory.split('/')[-1]
    
    #Loop through files
    for file in files:
        path_file = subdirectory + '/' + file
        path_files.append(path_file)
        labels.append(label)

In [152]:
#Encoding labels
label_encoder = LabelEncoder()
label_encoded = label_encoder.fit_transform(labels)

In [153]:
#Separate into three datasets
path_files_train, path_files_test, y_train, y_test = train_test_split(
    path_files, label_encoded, test_size=0.1, stratify=label_encoded)

path_files_train, path_files_valid, y_train, y_valid = train_test_split(
    path_files_train, y_train, test_size=0.1, stratify=y_train)

## Data Generator

Since the datasets are quite big, let's make a data generator.<br/>

In [154]:
def data_generator(file_list, label_list, batch_size):
    
    #Index used too go over file list 
    index = 0
    
    #Infinite loop
    while True:
        
        #Case we looped over all the files
        if((index + 1) * batch_size >= len(file_list)):
            #Reinit variables for a next round
            index = 0
            
            #Shuffle list to have different batches
            randomize = np.arange(len(file_list))
            np.random.shuffle(randomize)
            file_list = file_list[randomize]
            label_list = label_list[randomize]
            
        #Loop over files from index * batch size to (index + 1) * batch size
        else:
            #Get files paths
            file_chunk = file_list[index*batch_size:(index+1)*batch_size]
            label_chunk = label_list[index*batch_size:(index+1)*batch_size]
            
            #Init data and labels list
            data = []
            labels = []
            
            #Loop over batch files
            for file, label in zip(file_chunk, label_chunk):
                data.append(np.load(file).reshape(128, 313, 1))
                labels.append(tf.keras.utils.to_categorical(label, num_classes=7))
                
            data = np.asarray(data)
            labels = np.asarray(labels)
            yield data, labels
            index = index + 1

Let's use the Dataset tensorflow class based on those generators to create train, valid and test datasets 

In [155]:
batch_size = 32
train_dataset = tf.data.Dataset.from_generator(
    data_generator,
    args= [path_files_train, y_train, batch_size],
    output_types = (tf.float32, tf.float32),
    output_shapes = ((batch_size, 128, 313, 1),(batch_size, 7))
)

validation_dataset = tf.data.Dataset.from_generator(
    data_generator,
    args= [path_files_valid, y_valid, batch_size],
    output_types = (tf.float32, tf.float32),
    output_shapes = ((batch_size, 128, 313, 1),(batch_size, 7))
)

test_dataset = tf.data.Dataset.from_generator(
    data_generator,
    args= [path_files_test, y_test, batch_size],
    output_types = (tf.float32, tf.float32),
    output_shapes = ((batch_size, 128, 313, 1),(batch_size, 7))
)

## CNN Model Architecture

In [156]:
#Create layers of CNN
inputs = Input(shape = (128, 313, 1), name = "Input")
first_layer = Conv2D(filters = 32,
                     kernel_size = (5, 5),
                     padding = 'valid',
                     activation = 'relu')
second_layer = MaxPooling2D(pool_size = (2, 2))
third_layer = Dropout(rate=0.2)
fourth_layer = Flatten()
fifth_layer = Dense(128, activation='relu')
output_layer = Dense(7, activation='softmax')

#Organize them
x=first_layer(inputs)
x=second_layer(x)
x = third_layer(x)
x = fourth_layer(x)
x = fifth_layer(x)
outputs = output_layer(x)

#create model and compile
model = Model(inputs = inputs, outputs = outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

## Train CNN Model

In [157]:
#Get steps per epoch
steps_per_epoch = np.int32(np.ceil(len(path_files_train)/batch_size))
validation_steps = np.int32(np.ceil(len(path_files_valid)/batch_size))
print("steps_per_epoch = ", steps_per_epoch)
print("validation_steps = ", validation_steps)

steps_per_epoch =  638
validation_steps =  71


In [158]:
training_history = model.fit(train_dataset, validation_data = validation_dataset, steps_per_epoch = steps_per_epoch,
         validation_steps = validation_steps, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

## Evaluate model

In [None]:
#Plot training
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(np.arange(1 , len(trainin_history.history['accuracy']) + 1, 1),
         trainin_history.history['accuracy'],
         label = 'Training Accuracy',
         color = 'blue')
plt.plot(np.arange(1 , len(trainin_history.history['val_accuracy']) + 1, 1),
         trainin_history.history['val_accuracy'], 
         label = 'Validation Accuracy',
         color = 'red')
plt.legend()
plt.show()

In [None]:
#Get prediction
test_pred = model.predict(test_dataset)

#Get class prediction
test_pred_class = np.argmax(test_pred, axis=1)
y_test_class = np.argmax(y_test, axis=1)

In [None]:
#Evaluate model
from sklearn.metrics import classification_report
print(classification_report(y_test_class, test_pred_class))

from sklearn.metrics import confusion_matrix
cnf_matrix = confusion_matrix(y_test_class, test_pred_class)
print(cnf_matrix)

# More complex model

Inspired by VGG-16.

Input: 128x313 <br/>
Conv2D (3*3) pad=1, filters 64, ((128-3+2*1)+1) * ((313-3+2*1)+1) * 64 => 128 * 313 * 64 <br/>
Conv2D (3*3) pad=1, filters 64, ((128-3+2*1)+1) * ((313-3+2*1)+1) * 64 => 128 * 313 * 64 <br/>
MaxPool2D (3*3), stride = 2, ((128-3)/2 + 1) * ((313-3)/2 + 1) * 64 => 64 * 156 * 64

Conv2D (3*3) pad=1, filters 128 ((64-3+2*1)+1) * ((156-3+2*1)+1) * 128 => 64 * 156 * 128 <br/>
Conv2D (3*3) pad=1, filters 128 ((64-3+2*1)+1) * ((156-3+2*1)+1) * 128 => 64 * 156 * 128 <br/>
MaxPool2D (3*3), stride = 2, ((64-3)/2 + 1) * ((156-3)/2 + 1) * 128 => 32 * 78 * 128

Conv2D (3*3) pad=1, filters 256 ((32-3+2*1)+1) * ((78-3+2*1)+1) * 256 => 32 * 78 * 256 <br/>
Conv2D (3*3) pad=1, filters 256 ((32-3+2*1)+1) * ((78-3+2*1)+1) * 256 => 32 * 78 * 256 <br/>
Conv2D (3*3) pad=1, filters 256 ((32-3+2*1)+1) * ((78-3+2*1)+1) * 256 => 32 * 78 * 256 <br/>
MaxPool2D (3*3), stride = 2, ((32-3)/2 + 1) * ((78-3)/2 + 1) * 256 => 16 * 39 * 256

Conv2D (3*3) pad=1, filters 512 ((16-3+2*1)+1) * ((39-3+2*1)+1) * 512 => 16 * 39 * 512 <br/>
Conv2D (3*3) pad=1, filters 512 ((16-3+2*1)+1) * ((39-3+2*1)+1) * 512 => 16 * 39 * 512 <br/>
Conv2D (3*3) pad=1, filters 512 ((16-3+2*1)+1) * ((39-3+2*1)+1) * 512 => 16 * 39 * 512 <br/>
MaxPool2D (3*3), stride = 2, ((16-3)/2 + 1) * ((39-3)/2 + 1) * 512 => 8 * 19 * 512

Flatten => 77824 <br/>
Dense (4096), activation=relu => 4096 <br/>
Dropout (0.2)

Dense (4096), activation=relu => 4096 <br/>
Dropout (0.2)

Dense (7), activation=softmax


In [159]:
#Create layers of CNN
inputs = Input(shape = (128, 313, 1), name = "Input")
normalize = BatchNormalization(axis=2)
conv_1_1 = Conv2D(filters = 64,
                     kernel_size = (3, 3),
                     padding = 'same',
                     activation = 'relu')
conv_1_2 = Conv2D(filters = 64,
                     kernel_size = (3, 3),
                     padding = 'same',
                     activation = 'relu')
pool_1_3 = MaxPooling2D(pool_size = (3, 3), strides=(2,2))

x = normalize(inputs)
x = conv_1_1(x)
x = conv_1_2(x)
x = pool_1_3(x)

In [160]:
conv_2_1 = Conv2D(filters = 128,
                     kernel_size = (3, 3),
                     padding = 'same',
                     activation = 'relu')
conv_2_2 = Conv2D(filters = 128,
                     kernel_size = (3, 3),
                     padding = 'same',
                     activation = 'relu')
pool_2_3 = MaxPooling2D(pool_size = (3, 3), strides=(2,2))

x = conv_2_1(x)
x = conv_2_2(x)
x = pool_2_3(x)

In [161]:
conv_3_1 = Conv2D(filters = 256,
                     kernel_size = (3, 3),
                     padding = 'same',
                     activation = 'relu')
conv_3_2 = Conv2D(filters = 256,
                     kernel_size = (3, 3),
                     padding = 'same',
                     activation = 'relu')
conv_3_3 = Conv2D(filters = 256,
                     kernel_size = (3, 3),
                     padding = 'same',
                     activation = 'relu')
pool_3_4 = MaxPooling2D(pool_size = (3, 3), strides=(2,2))

x = conv_3_1(x)
x = conv_3_2(x)
x = conv_3_3(x)
x = pool_3_4(x)

In [162]:
conv_4_1 = Conv2D(filters = 512,
                     kernel_size = (3, 3),
                     padding = 'same',
                     activation = 'relu')
conv_4_2 = Conv2D(filters = 512,
                     kernel_size = (3, 3),
                     padding = 'same',
                     activation = 'relu')
conv_4_3 = Conv2D(filters = 512,
                     kernel_size = (3, 3),
                     padding = 'same',
                     activation = 'relu')
pool_4_4 = MaxPooling2D(pool_size = (3, 3), strides=(2,2))

x = conv_4_1(x)
x = conv_4_2(x)
x = conv_4_3(x)
x = pool_4_4(x)

In [163]:
flat_5_1 = Flatten()
dense_5_2 = Dense(4096, activation='relu')
drop_5_3 = Dropout(rate=0.2)
dense_5_4 = Dense(4096, activation='relu')
drop_5_5 = Dropout(rate=0.2)
out_5_6 = Dense(7, activation='softmax')

x = flat_5_1(x)
x = dense_5_2(x)
x = drop_5_3(x)
x = dense_5_4(x)
x = drop_5_5(x)
outputs = out_5_6(x)

## Compile the model

In [164]:
#create model and compile
model = Model(inputs = inputs, outputs = outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [165]:
model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input (InputLayer)          [(None, 128, 313, 1)]     0         
                                                                 
 batch_normalization_1 (Batc  (None, 128, 313, 1)      1252      
 hNormalization)                                                 
                                                                 
 conv2d_14 (Conv2D)          (None, 128, 313, 64)      640       
                                                                 
 conv2d_15 (Conv2D)          (None, 128, 313, 64)      36928     
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 63, 156, 64)      0         
 2D)                                                             
                                                                 
 conv2d_16 (Conv2D)          (None, 63, 156, 128)      7385

## Train the model

In [166]:
#Get steps per epoch
steps_per_epoch = np.int32(np.ceil(len(path_files_train)/batch_size))
validation_steps = np.int32(np.ceil(len(path_files_valid)/batch_size))
print("steps_per_epoch = ", steps_per_epoch)
print("validation_steps = ", validation_steps)

steps_per_epoch =  638
validation_steps =  71


In [167]:
training_history = model.fit(train_dataset, validation_data = validation_dataset, steps_per_epoch = steps_per_epoch,
         validation_steps = validation_steps, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100

KeyboardInterrupt: 

ça ne fonctionne pas du tout. Pourquoi?
