In [34]:
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from tensorflow import keras
import river
from river.stream import iter_pandas
from river.metrics import Accuracy,BalancedAccuracy,CohenKappa,GeometricMean,ROCAUC,F1
from river.evaluate import progressive_val_score
from river.naive_bayes import GaussianNB
from river.neighbors import KNNClassifier
from river.forest import ARFClassifier
from river.tree import HoeffdingTreeClassifier
from river.tree import HoeffdingAdaptiveTreeClassifier

In [35]:
from river_torch import classification
from torch import nn
from torch import optim
from torch import manual_seed

In [36]:
### SET YOUR PARAMETERS

dataset = 'fan' #change the dataset to use (fan/pump/valve)

shuffle = 0 #set to 1 to shuffle data
group_shuffle = 0 #set to 1 to shuffle data in group of 9
delay = 0 #change to set a delay in label arrival

model = ARFClassifier(seed=42) #(GaussianNB()/KNNClassifier(n_neighbors=2)/HoeffdingTreeClassifier()/KNNClassifier(n_neighbors=2)/HoeffdingAdaptiveTreeClassifier(seed=42)/KNNClassifier(n_neighbors=5, window_size=1000))
metric = BalancedAccuracy() #change the metric to display (Accuracy()/BalancedAccuracy()/ROCAUC()/F1())

In [37]:
# MY FAN
if dataset == 'fan':
    test_data = np.load('data/mimii/fan/ei-industry-4.0---predictivemaintenance---fan-spectrogram-X_testing.npy')
    train_data = np.load('data/mimii/fan/ei-industry-4.0---predictivemaintenance---fan-spectrogram-X_training.npy')
    train_labels = np.load('data/mimii/fan/ei-industry-4.0---predictivemaintenance---fan-spectrogram-y_training.npy')
    test_labels = np.load('data/mimii/fan/ei-industry-4.0---predictivemaintenance---fan-spectrogram-y_testing.npy')
    
    model1 = keras.models.load_model('data/mimii/fan/model.h5')

In [38]:
# MY PUMP
if dataset == 'pump':

    test_data = np.load('data/mimii/pump/ei-industry-4.0---predictive-maintenance---pump-mfe-X_testing.npy')
    train_data = np.load('data/mimii/pump/ei-industry-4.0---predictive-maintenance---pump-mfe-X_training.npy')
    train_labels = np.load('data/mimii/pump/ei-industry-4.0---predictive-maintenance---pump-mfe-y_training.npy')
    test_labels = np.load('data/mimii/pump/ei-industry-4.0---predictive-maintenance---pump-mfe-y_testing.npy')
    
    model1 = keras.models.load_model('data/mimii/pump/model.h5')

In [39]:
# MY VALVE
if dataset == 'valve':
    
    test_data = np.load('data/mimii/valve/ei-industry-4.0---predictive-maintenance---valve-spectrogram-X_testing.npy')
    train_data = np.load('data/mimii/valve/ei-industry-4.0---predictive-maintenance---valve-spectrogram-X_training.npy')
    train_labels = np.load('data/mimii/valve/ei-industry-4.0---predictive-maintenance---valve-spectrogram-y_training.npy')
    test_labels = np.load('data/mimii/valve/ei-industry-4.0---predictive-maintenance---valve-spectrogram-y_testing.npy')
    
    model1 = keras.models.load_model('data/mimii/valve/model.h5')

In [40]:
train_labels[train_labels[:,0]==2,0] = 0
test_labels[test_labels[:,0]==2,0] = 0

In [41]:
model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape (Reshape)           (None, 199, 129)          0         
                                                                 
 conv1d (Conv1D)             (None, 199, 8)            3104      
                                                                 
 max_pooling1d (MaxPooling1  (None, 100, 8)            0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 100, 8)            0         
                                                                 
 conv1d_1 (Conv1D)           (None, 100, 16)           400       
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 50, 16)            0         
 g1D)                                                   

In [42]:
#model.pop()
#model.summary()

In [43]:
feature_extractor = keras.Model(
   inputs=model1.inputs,
   outputs=model1.get_layer(name="flatten").output)

#feature_extractor = keras.Model(
#   inputs=model.inputs,
#   outputs=model.get_layer(name="dense").output)

features = feature_extractor.predict(train_data)

print(features.shape)

(3087, 800)


In [44]:
metrics = metric
#model.window = 1000

features_df = pd.DataFrame(features)
#labels_df = pd.DataFrame(train_labels[:,0])
labels_series = pd.Series(train_labels[:,0])
stream = iter_pandas(X=features_df, y=labels_series)

In [45]:
#shuffle the order of data grouped by 9

if group_shuffle == 1:

    distinct = len(set(train_labels[:,1]))

    features_shuffle = np.zeros(features.shape)
    train_labels_shuffle = np.zeros(train_labels[:,0].shape)

    positions = random.sample(range(distinct),distinct)

    for i in range(distinct):
        pos = positions[i]
        features_shuffle[i*9:i*9+8,:] = features[pos*9:pos*9+8,:]
        train_labels_shuffle[i*9:i*9+8] = train_labels[pos*9:pos*9+8,0]

In [46]:
#shuffle the order of data

if shuffle == 1:

    samples = features.shape[0]

    features_shuffle = np.zeros(features.shape)
    train_labels_shuffle = np.zeros(train_labels[:,0].shape)

    positions = random.sample(range(samples),samples)

    for i in range(samples):
        pos = positions[i]
        features_shuffle[i,:] = features[pos,:]
        train_labels_shuffle[i] = train_labels[pos,0]

In [47]:
if shuffle+group_shuffle == 1:

    features_df_shuffle = pd.DataFrame(features_shuffle)
    labels_series_shuffle = pd.Series(train_labels_shuffle)
    stream_shuffle = iter_pandas(X=features_df_shuffle, y=labels_series_shuffle)
    #model = ARFClassifier() #HoeffdingAdaptiveTreeClassifier() #GaussianNB() #KNNClassifier(n_neighbors=2, window_size=1000) #HoeffdingTreeClassifier() #HoeffdingAdaptiveTreeClassifier(seed=42)
    #metrics = BalancedAccuracy()

In [48]:
if shuffle+group_shuffle == 1:
    
    progressive_val_score(dataset=stream_shuffle,
                          model=model,
                          metric=metrics,
                          print_every=100)

In [49]:
if shuffle+group_shuffle == 0:

    progressive_val_score(dataset=stream,
                          model=model,
                          metric=metrics,
                          print_every=100)

[100] BalancedAccuracy: 100.00%
[200] BalancedAccuracy: 88.16%
[300] BalancedAccuracy: 96.74%
[400] BalancedAccuracy: 95.23%
[500] BalancedAccuracy: 95.76%
[600] BalancedAccuracy: 96.05%
[700] BalancedAccuracy: 96.23%
[800] BalancedAccuracy: 96.36%
[900] BalancedAccuracy: 96.46%
[1,000] BalancedAccuracy: 96.53%
[1,100] BalancedAccuracy: 96.58%
[1,200] BalancedAccuracy: 95.32%
[1,300] BalancedAccuracy: 95.79%
[1,400] BalancedAccuracy: 96.55%
[1,500] BalancedAccuracy: 96.98%
[1,600] BalancedAccuracy: 97.44%
[1,700] BalancedAccuracy: 97.76%
[1,800] BalancedAccuracy: 98.00%
[1,900] BalancedAccuracy: 98.18%
[2,000] BalancedAccuracy: 98.32%
[2,100] BalancedAccuracy: 98.44%
[2,200] BalancedAccuracy: 98.53%
[2,300] BalancedAccuracy: 98.62%
[2,400] BalancedAccuracy: 98.69%
[2,500] BalancedAccuracy: 98.75%
[2,600] BalancedAccuracy: 98.80%
[2,700] BalancedAccuracy: 98.84%
[2,800] BalancedAccuracy: 98.88%
[2,900] BalancedAccuracy: 98.92%
[3,000] BalancedAccuracy: 98.95%
[3,087] BalancedAccuracy: 9

In [50]:
features2 = feature_extractor.predict(test_data)
features_df2 = pd.DataFrame(features2)
labels_series2 = pd.Series(test_labels[:,0])
stream2 = iter_pandas(X=features_df2, y=labels_series2)
metrics= BalancedAccuracy()
#ROCAUC()
#Accuracy()
#BalancedAccuracy()
#F1()



In [51]:
progressive_val_score(dataset=stream2,
                      model=model,
                      metric=metrics,
                      delay=delay,
                      print_every=100)

[100] BalancedAccuracy: 85.76%
[200] BalancedAccuracy: 93.37%
[300] BalancedAccuracy: 95.83%
[400] BalancedAccuracy: 90.95%
[500] BalancedAccuracy: 94.69%
[600] BalancedAccuracy: 95.54%
[700] BalancedAccuracy: 96.07%
[756] BalancedAccuracy: 96.26%


BalancedAccuracy: 96.26%