In [8]:
!nvidia-smi

Sat May  7 02:14:28 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.68.01    Driver Version: 512.59       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0 Off |                  N/A |
|100%   31C    P8    28W / 350W |   4345MiB / 12288MiB |      7%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [9]:
import torch
import torch.utils.data
import importlib
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from datetime import datetime


import dataset
import autoencoder
import util

importlib.reload(dataset)
importlib.reload(autoencoder)
importlib.reload(util)


from dataset import NormalDataset, LabeledDataset
from autoencoder import Autoencoder
from util import accuracy

In [10]:
time_steps = 100
embedding_dim = 128
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_val_ratio = 0.9

dataset_names = ["A","B","C","D","E"]

In [11]:
normal_datasets = []
train_loaders = []
val_loaders = []

for idx, dataset_name in enumerate(dataset_names):
    normal_dataset = NormalDataset(f"../data/sensor_{dataset_name}_normal.csv", "telemetry", time_steps=time_steps)

    train_dataset_size = int(len(normal_dataset) * train_val_ratio)
    val_dataset_size = len(normal_dataset) - train_dataset_size
    train_dataset, val_dataset = torch.utils.data.random_split(normal_dataset, [train_dataset_size, val_dataset_size])

    train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=True)

    normal_datasets.append(normal_dataset)
    train_loaders.append(train_loader)
    val_loaders.append(train_loader)

In [12]:
autoencoders = []

for idx, dataset_name in enumerate(dataset_names):
    autoencoders.append(Autoencoder(time_steps=time_steps, embedding_dim=embedding_dim, device=device))


### Training

In [34]:
epochs = 200

time = datetime.now()


In [35]:
autoencoders[0].train(train_loaders[0], val_loaders[0], epochs=epochs, tensorboard_postfix=f"{time.strftime('%b%d_%H-%M-%S')}_{dataset_names[0]}")

Epoch 1: train loss 58.80229348402757, val loss 41.24128987238957
Epoch 2: train loss 40.302113166222206, val loss 39.7824030656081
Epoch 3: train loss 38.600096482496994, val loss 38.10395534221943
Epoch 4: train loss 38.17163804861215, val loss 38.181205162635216
Epoch 5: train loss 38.055823399470405, val loss 37.60826477637658
Epoch 6: train loss 37.73074091397799, val loss 37.4500010563777
Epoch 7: train loss 37.44109476529635, val loss 37.77532430795523
Epoch 8: train loss 37.51993208665114, val loss 37.201161017784706
Epoch 9: train loss 37.22493098332332, val loss 37.03129944434533
Epoch 10: train loss 37.14572202242338, val loss 37.42372468801645
Epoch 11: train loss 37.03401653583233, val loss 37.780884962815506
Epoch 12: train loss 36.928018423227165, val loss 36.684979512141304
Epoch 13: train loss 37.122325310340294, val loss 36.54857679513785
Epoch 14: train loss 36.68055666410006, val loss 37.45556567265437
Epoch 15: train loss 36.56090208200308, val loss 36.103725286630

In [36]:
autoencoders[1].train(train_loaders[1], val_loaders[1], epochs=epochs, tensorboard_postfix=f"{time.strftime('%b%d_%H-%M-%S')}_{dataset_names[1]}")

Epoch 1: train loss 37.33048519721398, val loss 30.905793703519382
Epoch 2: train loss 31.027043709388145, val loss 30.839955696692833
Epoch 3: train loss 31.07467585343581, val loss 31.111127046438362
Epoch 4: train loss 30.917333602905273, val loss 30.698418617248535
Epoch 5: train loss 30.926151348994328, val loss 30.711188096266525
Epoch 6: train loss 30.927995314964882, val loss 30.740506172180176
Epoch 7: train loss 30.678437379690315, val loss 30.714090714087853
Epoch 8: train loss 30.690753276531513, val loss 30.637316630436825
Epoch 9: train loss 30.648111490102913, val loss 30.58591519869291
Epoch 10: train loss 30.757363099318283, val loss 30.609631831829365
Epoch 11: train loss 30.8355371768658, val loss 30.624183948223408
Epoch 12: train loss 30.60204139122596, val loss 30.542743316063515
Epoch 13: train loss 30.79549686725323, val loss 30.69326166006235
Epoch 14: train loss 30.602706542381874, val loss 30.504379052382248
Epoch 15: train loss 30.562176924485428, val loss 3

In [37]:
autoencoders[2].train(train_loaders[2], val_loaders[2], epochs=epochs, tensorboard_postfix=f"{time.strftime('%b%d_%H-%M-%S')}_{dataset_names[2]}")

Epoch 1: train loss 76.11902280954214, val loss 54.338405682490425
Epoch 2: train loss 50.12148350935716, val loss 46.69163883649386
Epoch 3: train loss 45.1333015698653, val loss 44.35655736923218
Epoch 4: train loss 45.092832822066086, val loss 45.08064550619859
Epoch 5: train loss 45.40079010449923, val loss 44.971229369823746
Epoch 6: train loss 44.862683479602524, val loss 44.775349011788
Epoch 7: train loss 44.647159411357, val loss 43.86864181665274
Epoch 8: train loss 44.3873955653264, val loss 44.05119132078611
Epoch 9: train loss 44.075777732408966, val loss 44.9853868667896
Epoch 10: train loss 45.15579260312594, val loss 46.25376492280226
Epoch 11: train loss 44.31189823150635, val loss 44.396745681762695
Epoch 12: train loss 44.14521905092093, val loss 45.23432948039128
Epoch 13: train loss 45.64427506006681, val loss 47.13548315488375
Epoch 14: train loss 44.83638205895057, val loss 43.92213902106652
Epoch 15: train loss 43.72579814837529, val loss 43.30580933277424
Epoch

In [38]:
autoencoders[3].train(train_loaders[3], val_loaders[3], epochs=epochs, tensorboard_postfix=f"{time.strftime('%b%d_%H-%M-%S')}_{dataset_names[3]}")

Epoch 1: train loss 51.98519976322468, val loss 12.951955978686993
Epoch 2: train loss 8.942617067923912, val loss 4.969048976898193
Epoch 3: train loss 5.103706020575303, val loss 5.79046473136315
Epoch 4: train loss 4.758810740250808, val loss 4.078471862352812
Epoch 5: train loss 3.9993776999987087, val loss 3.989209165939918
Epoch 6: train loss 3.830094942679772, val loss 3.6801276757166934
Epoch 7: train loss 3.540389033464285, val loss 3.390052474462069
Epoch 8: train loss 3.6061081060996423, val loss 3.855124464401832
Epoch 9: train loss 3.3405658923662624, val loss 3.1461252524302554
Epoch 10: train loss 3.084144904063298, val loss 2.9928906697493334
Epoch 11: train loss 3.2657216237141538, val loss 2.9728532204261193
Epoch 12: train loss 2.765282731789809, val loss 2.549775783832257
Epoch 13: train loss 2.959572049287649, val loss 2.634499815794138
Epoch 14: train loss 2.614330695225642, val loss 2.367538800606361
Epoch 15: train loss 2.487341046333313, val loss 2.263659284665

In [39]:
autoencoders[4].train(train_loaders[4], val_loaders[4], epochs=epochs, tensorboard_postfix=f"{time.strftime('%b%d_%H-%M-%S')}_{dataset_names[4]}")

Epoch 1: train loss 59.053302331404254, val loss 19.34903517636386
Epoch 2: train loss 10.231680068102749, val loss 6.0434746092016045
Epoch 3: train loss 7.0414322506297715, val loss 10.413350148634477
Epoch 4: train loss 8.015675046227194, val loss 6.679999698292125
Epoch 5: train loss 7.139820218086243, val loss 8.960851192474365
Epoch 6: train loss 7.982181960886175, val loss 6.904082341627642
Epoch 7: train loss 6.092067415064031, val loss 4.858585661107844
Epoch 8: train loss 5.6761967160485005, val loss 8.021629550240256
Epoch 9: train loss 6.853499412536621, val loss 5.774530584161932
Epoch 10: train loss 7.003544384782964, val loss 9.92201380296187
Epoch 11: train loss 6.140507112849843, val loss 7.395807331258601
Epoch 12: train loss 6.205797260457819, val loss 5.5368431047959765
Epoch 13: train loss 5.889434955336831, val loss 5.455821362408725
Epoch 14: train loss 5.8531235673210835, val loss 4.318837946111506
Epoch 15: train loss 4.532486774704673, val loss 4.9467691074718

### Load Model

In [13]:
model_postfix = "May07_01-34-18"
epochs_prefix = 200
for dataset_name, autoencoder_item in zip(dataset_names, autoencoders):
    path = f"../model/checkpoint_{model_postfix}_{dataset_name}_{epochs_prefix}.pth.tar"
    autoencoder_item.load_model(path)

### Testing

In [17]:
x_name_list = ["telemetry", "telemary_value", "telemetry", "telemetry", "telemetry"]

public_datasets = []
private_datasets = []


for idx, dataset_name in enumerate(dataset_names):
    public_dataset = LabeledDataset(f"../data/sensor_{dataset_name}_public.csv", x_name_list[idx], "label", time_steps=time_steps)
    private_dataset = NormalDataset(f"../data/sensor_{dataset_name}_private.csv", x_name_list[idx], time_steps=time_steps)

    public_datasets.append(public_dataset)
    private_datasets.append(private_dataset)


In [18]:
for dataset_name, autoencoder_item, public_dataset, private_dataset in zip(dataset_names, autoencoders, public_datasets, private_datasets):
    pred, loss = autoencoder_item.get_pred_and_loss(public_dataset)
    real_len = public_dataset.get_real_len()

    seq_true = public_dataset.get_1D_seq()
    seq_predict = pred.flatten()[:real_len]
    seq_loss = loss.flatten()[:real_len]
    seq_labels = public_dataset.get_labels()

    threshold = 1.5

    seq_anomaly = (seq_loss > threshold).astype('int32')

    # print(seq_true.shape)
    # print(seq_predict.shape)
    # print(seq_loss)
    # print(seq_labels)
    # print(seq_anomaly)

    print("accuracy:", accuracy(seq_labels, seq_anomaly))
    print(classification_report(seq_labels, seq_anomaly),"\n\n")


accuracy: 0.6575
              precision    recall  f1-score   support

           0       0.65      0.97      0.78      2501
           1       0.73      0.14      0.23      1499

    accuracy                           0.66      4000
   macro avg       0.69      0.55      0.51      4000
weighted avg       0.68      0.66      0.57      4000
 


accuracy: 0.9205
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      3692
           1       0.00      0.00      0.00       308

    accuracy                           0.92      4000
   macro avg       0.46      0.50      0.48      4000
weighted avg       0.85      0.92      0.88      4000
 


accuracy: 0.80875
              precision    recall  f1-score   support

           0       0.99      0.80      0.89      3707
           1       0.26      0.89      0.41       293

    accuracy                           0.81      4000
   macro avg       0.63      0.85      0.65      4000
weighted avg    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Output

In [20]:
output_np = np.array([], dtype="float32")

for dataset_name, autoencoder_item, public_dataset in zip(dataset_names, autoencoders, public_datasets):
    pred, loss = autoencoder_item.get_pred_and_loss(public_dataset)
    real_len = public_dataset.get_real_len()

    seq_true = public_dataset.get_1D_seq()
    seq_predict = pred.flatten()[:real_len]
    seq_loss = loss.flatten()[:real_len]

    output_np = np.concatenate((output_np, seq_loss))

for dataset_name, autoencoder_item, private_dataset in zip(dataset_names, autoencoders, private_datasets):
    pred, loss = autoencoder_item.get_pred_and_loss(private_dataset)
    real_len = private_dataset.get_real_len()

    seq_true = private_dataset.get_1D_seq()
    seq_predict = pred.flatten()[:real_len]
    seq_loss = loss.flatten()[:real_len]

    output_np = np.concatenate((output_np, seq_loss))

print(output_np.shape)

output_df = pd.DataFrame(output_np, columns=["pred"])
output_df.index.name = "id"

print(output_df)
output_df.to_csv(f'../outputs/output_{model_postfix}_{epochs_prefix}.csv')

(40000,)
           pred
id             
0      0.173600
1      0.010230
2      0.007169
3      0.017496
4      0.026557
...         ...
39995  0.000870
39996  0.000890
39997  0.000901
39998  0.000891
39999  0.000898

[40000 rows x 1 columns]
