In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import os
os.chdir(r"..")
from ctuFaultDetector.utils import *
from ctuFaultDetector.visual import *
from ctuFaultDetector.models.lstmClassifier import *

## Data load

At first we will need to prepair the data. We have a csv file with all the signals. The columns of the csv file are:

idx, label, meas_id, Force_x, Force_y, Force_z, Torque_x, Torque_y, Torque_z

- idx is the identifier of the signal
- label is the boolean or None label of the signal
- meas_id is the identifier of the group of measurement (e.g. day of the measurement) in the form of an int 1 - n
- The other columns are signal feature columns


We create variables:

data, signals, labels, correct_signals, anom_signals

- data: list of tuples in the form of (signal : pd.DataFrame, label : bool)
- signals: list of signals in the form of pd.Dataframe
- labels: list of boolean/None labels
- correct_signals: list of signals with label "True"
- wrong_signals: list of signals with label "False"
- unlabeled_signals: list of signals with label "None"



### Loading the data.
We will begin our presentation with loading the data from the dataset. Execute the following cell to do that

In [2]:
print(os.getcwd())
data = load_data("./ctuFaultDetector/data/dataset.csv", id = [1,2,5,6])
signals = [i[0] for i in data]
labels = [i[1] for i in data]
correct_signals = [transform_pd_to_npy(i[0]) for i in data if i[1] == False]
anom_signals = [transform_pd_to_npy(i[0]) for i in data if i[1]==True]
print("Number of correct: ", len(correct_signals), ", Number of anomalous: ", len(anom_signals))
print()
print("Number of signals:", len(data))
print()
print("Format of a signal:\n\n", data[0])

c:\Users\altrn\Documents\fifth_semester\PROJ\Anomaly-detection-in-timeseries
Number of correct:  285 , Number of anomalous:  107

Number of signals: 392

Format of a signal:

 (        Force_x  Force_y    Force_z  Torque_x  Torque_y  Torque_z
0     22.672001    0.000  22.672001    -1.028     1.216     0.960
1     22.672001    0.000  22.672001    -1.028     1.216     0.960
2     22.464001    0.000  22.256001    -0.976     1.184     1.000
3     22.464001    0.000  22.256001    -0.976     1.184     1.000
4     22.256001   -0.208  21.216000    -0.876     1.152     0.980
...         ...      ...        ...       ...       ...       ...
1008  20.384001    0.208  18.719999    -0.852     1.112     0.852
1009  19.968000    0.000  18.304001    -0.944     1.104     0.832
1010  19.968000    0.000  18.304001    -0.944     1.104     0.832
1011  20.384001   -0.416  17.888000    -0.812     1.112     0.896
1012  21.007999   -0.624  18.096001    -0.748     1.108     0.916

[1013 rows x 6 columns], False

Now that we have loaded the data, let us see the methods we developed. Let's divide the dataset into a training and testing split!

In [3]:
np.random.seed(42)
np.random.shuffle(data)
training_set, testing_set = get_n_th_fold(data, 0, small_train=False)
correct_training_signals = [transform_pd_to_npy(i[0]) for i in training_set if i[1] == False]
anom_training_signals = [transform_pd_to_npy(i[0]) for i in training_set if i[1]==True]
correct_testing_signals = [transform_pd_to_npy(i[0]) for i in testing_set if i[1] == False]
anom_testing_signals = [transform_pd_to_npy(i[0]) for i in testing_set if i[1]==True]
print(f"The length of the training set: {len(training_set)}")
print(f"Number of correct/anomalous processes: {len(correct_training_signals)}/{len(anom_training_signals)}")
print("\n")
print(f"The length of the testing set: {len(testing_set)}")
print(f"Number of correct/anomalous processes: {len(correct_testing_signals)}/{len(anom_testing_signals)}")

The length of the training set: 336
Number of correct/anomalous processes: 244/92


The length of the testing set: 56
Number of correct/anomalous processes: 41/15


Ok, now we are all set up to start testing the methods. We start by training the neural network. If we do not want to spend time training (10-15 minutes), we can load from the presaved model. Change the value of TRAIN_MODEL variable to True to train the model or to False to load already trained model.


## LSTM detector


In [4]:
TRAIN_MODEL = False
lk = lstmClassifier(6,2)
if TRAIN_MODEL:
    lk.train_classifier(data, N_EPOCHS = 60, weighted_loss = True, weight = 1)
else:
    lk = torch.load("./ctuFaultDetector/model_params/lstm_WL.pth")

In [5]:
def evaluate_offline_lstm(classifier, testing_set):
    correct_testing_signals = [transform_pd_to_npy(i[0]) for i in testing_set if i[1] == False]
    anom_testing_signals = [transform_pd_to_npy(i[0]) for i in testing_set if i[1]==True]
    correct_perf = 0
    anomaly_perf = 0
    for sig in correct_testing_signals:
        correct_perf += not classifier.predict(sig)
    for sig in anom_testing_signals:
        anomaly_perf += classifier.predict(sig)
    print(f"Correctly predicted successfull signals: {correct_perf}/{len(correct_testing_signals)} --> TNR = {correct_perf/len(correct_testing_signals)}")
    print(f"Correctly predicted anomalous signals: {anomaly_perf}/{len(anom_testing_signals)} --> TPR =  {anomaly_perf/len(anom_testing_signals)}")
    print(f"Accuracy: {(correct_perf+anomaly_perf)/len(testing_set)}")

In [6]:
evaluate_offline_lstm(lk, testing_set)

Correctly predicted successfull signals: 39/41 --> TNR = 0.9512195121951219
Correctly predicted anomalous signals: 11/15 --> TPR =  0.7333333333333333
Accuracy: 0.8928571428571429


In [11]:
torch.save(lk, "./ctuFaultDetector/model_params/lstm_WL.pth")