In [47]:
import abc
import logging
import random

import numpy as np
import torch

from torch.autograd import Variable


class Algorithm(metaclass=abc.ABCMeta):
    def __init__(self, module_name, name, seed, details=False):
        self.logger = logging.getLogger(module_name)
        self.name = name
        self.seed = seed
        self.details = details
        self.prediction_details = {}
        self.history = {}

        if self.seed is not None:
            random.seed(seed)
            np.random.seed(seed)

    def __str__(self):
        return self.name

    @abc.abstractmethod
    def fit(self, X):
        """
        Train the algorithm on the given dataset
        """

    @abc.abstractmethod
    def predict(self, X):
        """
        :return anomaly score
        """


class PyTorchUtils(metaclass=abc.ABCMeta):
    def __init__(self, seed, gpu):
        self.gpu = gpu
        self.seed = seed
        if self.seed is not None:
            torch.manual_seed(self.seed)
            torch.cuda.manual_seed(self.seed)
        self.framework = 0

    @property
    def device(self):
        return torch.device('cuda:{self.gpu}' if torch.cuda.is_available() and self.gpu is not None else 'cpu')

    def to_var(self, t, **kwargs):
        # ToDo: check whether cuda Variable.
        t = t.to(self.device)
        return Variable(t, **kwargs)

    def to_device(self, model):
        model.to(self.device)

In [48]:
import time
start_time = time.time()

import numpy as np
import pandas as pd
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim

from scipy.stats import multivariate_normal
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from tqdm import trange
import matplotlib.pyplot as plt
%matplotlib inline

!git clone https://github.com/priyojitk/mit-resources.git yahoo_dataset
file_path = '/content/yahoo_dataset/'
import sys
sys.path.append(file_path)

from sklearn.metrics import confusion_matrix

fatal: destination path 'yahoo_dataset' already exists and is not an empty directory.


In [49]:
from sklearn.metrics import accuracy_score, fbeta_score
from sklearn.metrics import precision_recall_fscore_support as prf
from sklearn.metrics import roc_curve, auc, roc_auc_score

def get_accuracy_precision_recall_fscore(y_true: list, y_pred: list):
        accuracy = accuracy_score(y_true, y_pred)
        # warn_for=() avoids log warnings for any result being zero
        precision, recall, f_score, _ = prf(y_true, y_pred, average='binary', warn_for=())
        if precision == 0 and recall == 0:
            f01_score = 0
        else:
            f01_score = fbeta_score(y_true, y_pred, average='binary', beta=0.1)
        return accuracy, precision, recall, f_score, f01_score

# accuracy, precision, recall, f_score, f01_score = get_accuracy_precision_recall_fscore(_test['is_anomaly'], pred_y)

In [50]:
def threshold(score):
    return np.nanmean(score) + 2 * np.nanstd(score)


In [51]:
class Autoencoder(nn.Module):
    def __init__(self, kernel_size, sequence_length, input_dim, latent_dim, num_layers):
        super(Autoencoder, self).__init__()
        self.sequence_length = sequence_length
        self.conv_padding=int((kernel_size - 1)/2) #in this way the output has the same size
        self.maxpool_kernel_size = 3
        self.maxpool_padding=int((self.maxpool_kernel_size - 1)/2) #in this way the output has the same size
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 5, kernel_size=kernel_size, padding = self.conv_padding),
            nn.MaxPool1d(self.maxpool_kernel_size, stride=1, padding = self.maxpool_padding),
            nn.ReLU(True),
            nn.Conv1d(5, 25, kernel_size=kernel_size, padding = self.conv_padding),
            nn.MaxPool1d(self.maxpool_kernel_size, stride=1, padding = self.maxpool_padding),
            nn.ReLU(True)
        )
            
        self.decoder = nn.Sequential(             
            nn.ConvTranspose1d(25, 5, kernel_size=kernel_size, padding = self.conv_padding),
            nn.MaxPool1d(self.maxpool_kernel_size, stride=1, padding = self.maxpool_padding),
            nn.ReLU(True),
            nn.ConvTranspose1d(5, 1, kernel_size=kernel_size, padding = self.conv_padding),
            nn.MaxPool1d(self.maxpool_kernel_size, stride=1, padding = self.maxpool_padding),
            nn.ReLU(True))
        
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        self.num_layers = num_layers

        self.encoder1 = nn.LSTM(self.input_dim, self.latent_dim, num_layers=self.num_layers[0])

        self.decoder1 = nn.LSTM(self.latent_dim, self.input_dim, num_layers=self.num_layers[1])

    def forward(self, x):
        batch_size = x.shape[0]
        x = x.view(batch_size, 1, self.sequence_length) #for convolution
        x = self.encoder(x.float())
        x = self.decoder(x)
        x = x.view(batch_size, self.sequence_length, 1) #for lstm
        # print(x.shape)
        input = x
        sequence_length = input.shape[1]

        encoded, (last_hidden, _) = self.encoder1(input.float())
        # print(encoded.shape)
        decoder_input = encoded[:, -1:].repeat(1, sequence_length, 1)
        decoded, _ = self.decoder1(decoder_input)  
        return decoded

In [52]:
#  model =  Autoencoder(kernel_size = 5, sequence_length = 10, input_dim=1, latent_dim=15, num_layers=(1, 1))
# model
# input = torch.randn(20, 10, 1)
# batch_size = input.shape[0]
# sequence_length = input.shape[1]
# model(input).shape

In [53]:
class CLSTMED(Algorithm, PyTorchUtils):
    def __init__(self, name: str = 'LSTM-ED',
                 num_epochs: int = 10, 
                 batch_size: int = 20, 
                 lr: float = 1e-3,
                 hidden_size: int = 5,
                 sequence_length: int = 48,
                 train_gaussian_percentage: float = 0.2,
                 n_layers: tuple = (1, 1), 
                 use_bias: tuple = (True, True),
                 dropout: tuple = (0, 0),
                 seed: int = 1,
                 gpu: int = None,
                 details=True):
        
        Algorithm.__init__(self, __name__, name, seed, details=details)
        if self.seed is not None:
            random.seed(seed)
            np.random.seed(seed)
        
        PyTorchUtils.__init__(self, seed, gpu)
        
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.lr = lr
        self.hidden_size = hidden_size
        self.sequence_length = sequence_length
        self.train_gaussian_percentage = train_gaussian_percentage

        self.n_layers = n_layers
        self.use_bias = use_bias
        self.dropout = dropout 
        self.mean, self.cov = None, None


    def fit(self, X: pd.DataFrame):
        
        X.interpolate(inplace=True) #
        X.bfill(inplace=True) #backward fill values
        data = X.values
        sequences = [data[i:i + self.sequence_length] for i in range(data.shape[0] - self.sequence_length + 1)]
        indices = np.random.permutation(len(sequences)) 
        
        split_point = int(self.train_gaussian_percentage * len(sequences))
#         self.logger.info(f'len of split_point : {(split_point)}')
        train_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True,
                                  sampler=SubsetRandomSampler(indices[:-split_point]), pin_memory=True)
        train_gaussian_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True,
                                           sampler=SubsetRandomSampler(indices[-split_point:]), pin_memory=True)

        self.clstmed = Autoencoder(kernel_size = 5, sequence_length = self.sequence_length, 
                                   input_dim=1, latent_dim=self.hidden_size, num_layers=self.n_layers)
        self.to_device(self.clstmed)

#         self.logger.info(f'X.shape : {X.shape}')
        self.to_device(self.clstmed)
        optimizer = torch.optim.Adam(self.clstmed.parameters(), lr=self.lr)

        self.clstmed.train() #set to training mode=True
        self.history['train_loss'] = []
        self.history['val_loss'] = []
        
        for epoch in trange(self.num_epochs): #self.num_epochs
            self.clstmed.train() #set to training mode=True
            train_batch_loss = []
            for ts_batch in train_loader:
                # print("ts_batch size", ts_batch.shape) #[20, 24, 1]
                output = self.clstmed(self.to_var(ts_batch.float()))
                loss = nn.MSELoss(reduction='mean')(output, self.to_var(ts_batch.float())) 
                self.clstmed.zero_grad()
                loss.backward()
                optimizer.step()
                train_batch_loss.append(loss.item())
            train_loss = sum(train_batch_loss)/len(train_batch_loss)
            self.history['train_loss'].append(train_loss)
        

            self.clstmed.eval()
            val_batch_loss = []
            for ts_batch in train_gaussian_loader:
                output = self.clstmed(self.to_var(ts_batch))
                val_loss = nn.MSELoss(reduction='mean')(output, self.to_var(ts_batch.float())) 
                val_batch_loss.append(val_loss.item())
            self.history['val_loss'].append(sum(val_batch_loss)/len(val_batch_loss))

        self.clstmed.eval()
        error_vectors = []
        count = 0
        for ts_batch in train_gaussian_loader:
            output = self.clstmed(self.to_var(ts_batch))
            error = nn.L1Loss(reduction='none')(output, self.to_var(ts_batch.float())) #MAE
            error_vectors += list(error.view(-1, X.shape[1]).data.cpu().numpy())

        self.mean = np.mean(error_vectors, axis=0)
        self.cov = np.cov(error_vectors, rowvar=False)
        # logging.info(f'Mean error in evaluation : {self.mean}')
        # logging.info(f'Mean cov in evaluation : {self.cov}')

    def predict(self, X: pd.DataFrame):
        X.interpolate(inplace=True)
        X.bfill(inplace=True)
        data = X.values
        sequences = [data[i:i + self.sequence_length] for i in range(data.shape[0] - self.sequence_length + 1)]
        data_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, shuffle=False, drop_last=False)

        self.clstmed.eval()
        mvnormal = multivariate_normal(self.mean, self.cov, allow_singular=True)
        scores = []
        outputs = []
        errors = []
        for idx, ts in enumerate(data_loader):
            output = self.clstmed(self.to_var(ts))
            error = nn.L1Loss(reduction='none')(output, self.to_var(ts.float()))
            score = -mvnormal.logpdf(error.view(-1, X.shape[1]).data.cpu().numpy())
            # score = mvnormal.pdf(error.view(-1, X.shape[1]).data.cpu().numpy())
            scores.append(score.reshape(ts.size(0), self.sequence_length)) #ts.size() == batch size
            if self.details:
                outputs.append(output.data.numpy())
                errors.append(error.data.numpy())

        # stores seq_len-many scores per timestamp and averages them
        scores = np.concatenate(scores)
        self.logger.debug(f'score shape 2 , {len(scores)}')
        lattice = np.full((self.sequence_length, data.shape[0]), np.nan)
        self.logger.debug(f'lattice shape 1 , {lattice.shape}')
        for i, score in enumerate(scores):
            lattice[i % self.sequence_length, i:i + self.sequence_length] = score
        scores = np.nanmean(lattice, axis=0)
        self.logger.debug(f'score shape 3 : {scores.shape}')

        return scores



In [54]:
def get_specificity(actual_y, pred_y):
    tn, fp, fn, tp = confusion_matrix(actual_y, pred_y).ravel()
    specificity = tn / (tn + fp)
    return specificity

In [55]:
avg_accuracy, avg_precision, avg_recall, avg_f_score, avg_f01_score = 0, 0, 0, 0, 0
avg_auc = 0
avg_specificity = 0
A1 = 67
A4 = 100
no_of_dataset = A1
zero_anomaly_test = 0

In [56]:
from sklearn.preprocessing import StandardScaler

for ind in range(1, no_of_dataset + 1):

    print('\nDataset ', ind)
    logging.info(f'\n\n=============\n Dataset {ind} : \n=============')

    df = pd.read_csv(f'{file_path}dataset/ydata-labeled-time-series-anomalies-v1_0/A1Benchmark/real_{ind}.csv', index_col='timestamp')
    from sklearn.preprocessing import StandardScaler
    df['value'] = df['value'].astype(np.float64)

    # Normalize the  data (center around 0 and scale to remove the variance).
    scaler = StandardScaler()
    df['value'] = scaler.fit_transform(df['value'].values.reshape(-1, 1))

    len_of_data = df.shape[0]
    # _train = df[0:500] 
    # _test = df[500:]

    _train = df[0:int(len_of_data * 0.4)] # 40 percent when index is not range(1, N)
    _test = df[int(len_of_data * 0.4):] # 60 percent
    # _train = df.head(int(len_of_data * 0.5))
    # _test = df.tail(int(len_of_data * 0.5)) # 60 percent
    print('length of training set :', len(_train))
    print('length of test set :', len(_test))

    #If there is no anomaly in 'test' data or whole data ignore the dataset
    anomaly_count_test = len(_test[_test['is_anomaly'] == 1])
    # anomaly_count_test = len(_test[_test['anomaly'] == 1])
    print('#anomaly in test set :', anomaly_count_test)
    if anomaly_count_test == 0:
        zero_anomaly_test += 1
        continue

    model = CLSTMED(num_epochs=30, sequence_length=10, hidden_size=10, n_layers=(1, 1))#56
    model.fit(_train[['value']].copy())

    scores = model.predict(_test[['value']].copy())
    th = threshold(scores)
    pred_y = np.where(np.array(scores) > th, 1, 0) 

    accuracy, precision, recall, f_score, f01_score = get_accuracy_precision_recall_fscore(_test['is_anomaly'], pred_y)
    auc = roc_auc_score(_test['is_anomaly'], pred_y)
    specificity = get_specificity(_test['is_anomaly'], pred_y)

    print(f'Accuracy : {accuracy}')
    print(f'Precision : {precision}')
    print(f'recall : {recall}')
    print(f'f_score : {f_score}')
    print(f'f01_score : {f01_score}')
    print(f'auc : {auc}')
    print(f'specificity : {specificity}')
    

    avg_accuracy += accuracy
    avg_precision += precision
    avg_recall += recall
    avg_f_score += f_score
    avg_f01_score += f01_score
    avg_auc += auc
    avg_specificity += specificity

denominator = (no_of_dataset - zero_anomaly_test)
print(f'Avg Accuracy : {avg_accuracy / denominator }')
print(f'Avg Precision : {avg_precision / denominator }')
print(f'Avg recall : {avg_recall / denominator }')
print(f'Avg f_score : {avg_f_score / denominator }')
print(f'Avg f01_score : {avg_f01_score / denominator }')
print(f'Avg auc : {avg_auc / denominator }')
print(f'Avg specificity : {avg_specificity / denominator }')
print(f"--- {(time.time() - start_time)/60} minutes ---" )


  0%|          | 0/30 [00:00<?, ?it/s]


Dataset  1
length of training set : 568
length of test set : 852
#anomaly in test set : 2


100%|██████████| 30/30 [00:09<00:00,  3.19it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9870892018779343
Precision : 0.15384615384615385
recall : 1.0
f_score : 0.2666666666666667
f01_score : 0.15514592933947774
auc : 0.9935294117647059
specificity : 0.9870588235294118

Dataset  2
length of training set : 575
length of test set : 864
#anomaly in test set : 16


100%|██████████| 30/30 [00:09<00:00,  3.25it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9872685185185185
Precision : 1.0
recall : 0.3125
f_score : 0.47619047619047616
f01_score : 0.9786821705426356
auc : 0.65625
specificity : 1.0

Dataset  3
length of training set : 584
length of test set : 877
#anomaly in test set : 14


100%|██████████| 30/30 [00:09<00:00,  3.10it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9942987457240593
Precision : 1.0
recall : 0.6428571428571429
f_score : 0.782608695652174
f01_score : 0.9945295404814004
auc : 0.8214285714285714
specificity : 1.0

Dataset  4
length of training set : 569
length of test set : 854
#anomaly in test set : 5


100%|██████████| 30/30 [00:09<00:00,  3.21it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  5
length of training set : 575
length of test set : 864
#anomaly in test set : 0

Dataset  6
length of training set : 575
length of test set : 864
#anomaly in test set : 8


100%|██████████| 30/30 [00:09<00:00,  3.25it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988425925925926
Precision : 1.0
recall : 0.875
f_score : 0.9333333333333333
f01_score : 0.998587570621469
auc : 0.9375
specificity : 1.0

Dataset  7
length of training set : 569
length of test set : 854
#anomaly in test set : 38


100%|██████████| 30/30 [00:09<00:00,  3.25it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.955503512880562
Precision : 0.5
recall : 0.07894736842105263
f_score : 0.13636363636363635
f01_score : 0.4749216300940438
auc : 0.5376354489164087
specificity : 0.9963235294117647

Dataset  8
length of training set : 568
length of test set : 852
#anomaly in test set : 10


100%|██████████| 30/30 [00:09<00:00,  3.26it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988262910798122
Precision : 0.9090909090909091
recall : 1.0
f_score : 0.9523809523809523
f01_score : 0.9099099099099099
auc : 0.9994061757719714
specificity : 0.998812351543943

Dataset  9
length of training set : 584
length of test set : 877
#anomaly in test set : 8


100%|██████████| 30/30 [00:09<00:00,  3.12it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9931584948688712
Precision : 1.0
recall : 0.25
f_score : 0.4
f01_score : 0.9711538461538461
auc : 0.625
specificity : 1.0

Dataset  10
length of training set : 575
length of test set : 864
#anomaly in test set : 13


100%|██████████| 30/30 [00:09<00:00,  3.30it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  11
length of training set : 575
length of test set : 864
#anomaly in test set : 19


100%|██████████| 30/30 [00:09<00:00,  3.29it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9849537037037037
Precision : 1.0
recall : 0.3157894736842105
f_score : 0.4799999999999999
f01_score : 0.9789983844911146
auc : 0.6578947368421053
specificity : 1.0

Dataset  12
length of training set : 575
length of test set : 864
#anomaly in test set : 2


100%|██████████| 30/30 [00:09<00:00,  3.24it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988425925925926
Precision : 1.0
recall : 0.5
f_score : 0.6666666666666666
f01_score : 0.9901960784313726
auc : 0.75
specificity : 1.0

Dataset  13
length of training set : 575
length of test set : 864
#anomaly in test set : 9


100%|██████████| 30/30 [00:09<00:00,  3.31it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9965277777777778
Precision : 1.0
recall : 0.6666666666666666
f_score : 0.8
f01_score : 0.9950738916256158
auc : 0.8333333333333333
specificity : 1.0

Dataset  14
length of training set : 575
length of test set : 864
#anomaly in test set : 0

Dataset  15
length of training set : 575
length of test set : 864
#anomaly in test set : 8


100%|██████████| 30/30 [00:09<00:00,  3.29it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9965277777777778
Precision : 1.0
recall : 0.625
f_score : 0.7692307692307693
f01_score : 0.9940944881889763
auc : 0.8125
specificity : 1.0

Dataset  16
length of training set : 584
length of test set : 877
#anomaly in test set : 3


100%|██████████| 30/30 [00:09<00:00,  3.12it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9977194982896237
Precision : 1.0
recall : 0.3333333333333333
f_score : 0.5
f01_score : 0.9805825242718447
auc : 0.6666666666666666
specificity : 1.0

Dataset  17
length of training set : 569
length of test set : 855
#anomaly in test set : 227


100%|██████████| 30/30 [00:09<00:00,  3.27it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.7941520467836257
Precision : 1.0
recall : 0.22466960352422907
f_score : 0.3669064748201439
f01_score : 0.9669607659095175
auc : 0.6123348017621145
specificity : 1.0

Dataset  18
length of training set : 584
length of test set : 877
#anomaly in test set : 0

Dataset  19
length of training set : 569
length of test set : 855
#anomaly in test set : 227


100%|██████████| 30/30 [00:08<00:00,  3.34it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.7672514619883041
Precision : 1.0
recall : 0.12334801762114538
f_score : 0.2196078431372549
f01_score : 0.9342583415923357
auc : 0.5616740088105727
specificity : 1.0

Dataset  20
length of training set : 568
length of test set : 854
#anomaly in test set : 9


100%|██████████| 30/30 [00:09<00:00,  3.28it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.990632318501171
Precision : 1.0
recall : 0.1111111111111111
f_score : 0.19999999999999998
f01_score : 0.926605504587156
auc : 0.5555555555555556
specificity : 1.0

Dataset  21
length of training set : 568
length of test set : 852
#anomaly in test set : 6


100%|██████████| 30/30 [00:09<00:00,  3.28it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988262910798122
Precision : 1.0
recall : 0.8333333333333334
f_score : 0.9090909090909091
f01_score : 0.9980237154150197
auc : 0.9166666666666667
specificity : 1.0

Dataset  22
length of training set : 568
length of test set : 852
#anomaly in test set : 63


100%|██████████| 30/30 [00:09<00:00,  3.22it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9565727699530516
Precision : 1.0
recall : 0.4126984126984127
f_score : 0.5842696629213483
f01_score : 0.9861058956064589
auc : 0.7063492063492063
specificity : 1.0

Dataset  23
length of training set : 568
length of test set : 852
#anomaly in test set : 9


100%|██████████| 30/30 [00:09<00:00,  3.32it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.994131455399061
Precision : 0.6428571428571429
recall : 1.0
f_score : 0.782608695652174
f01_score : 0.6451383960255501
auc : 0.9970344009489918
specificity : 0.9940688018979834

Dataset  24
length of training set : 584
length of test set : 877
#anomaly in test set : 15


100%|██████████| 30/30 [00:09<00:00,  3.21it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  25
length of training set : 574
length of test set : 861
#anomaly in test set : 43


100%|██████████| 30/30 [00:09<00:00,  3.30it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988385598141696
Precision : 1.0
recall : 0.9767441860465116
f_score : 0.988235294117647
f01_score : 0.9997643176997407
auc : 0.9883720930232558
specificity : 1.0

Dataset  26
length of training set : 574
length of test set : 861
#anomaly in test set : 75


100%|██████████| 30/30 [00:09<00:00,  3.22it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.8896631823461092
Precision : 0.0
recall : 0.0
f_score : 0.0
f01_score : 0
auc : 0.4872773536895674
specificity : 0.9745547073791349

Dataset  27
length of training set : 570
length of test set : 857
#anomaly in test set : 2


100%|██████████| 30/30 [00:09<00:00,  3.26it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  28
length of training set : 576
length of test set : 865
#anomaly in test set : 81


100%|██████████| 30/30 [00:09<00:00,  3.20it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.8658959537572254
Precision : 0.02702702702702703
recall : 0.012345679012345678
f_score : 0.01694915254237288
f01_score : 0.026712509918011107
auc : 0.4832136558327035
specificity : 0.9540816326530612

Dataset  29
length of training set : 576
length of test set : 865
#anomaly in test set : 6


100%|██████████| 30/30 [00:09<00:00,  3.24it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9942196531791907
Precision : 0.5714285714285714
recall : 0.6666666666666666
f_score : 0.6153846153846153
f01_score : 0.5722379603399433
auc : 0.8315871168024834
specificity : 0.9965075669383003

Dataset  30
length of training set : 584
length of test set : 877
#anomaly in test set : 8


100%|██████████| 30/30 [00:09<00:00,  3.06it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988597491448119
Precision : 1.0
recall : 0.875
f_score : 0.9333333333333333
f01_score : 0.998587570621469
auc : 0.9375
specificity : 1.0

Dataset  31
length of training set : 570
length of test set : 857
#anomaly in test set : 24


100%|██████████| 30/30 [00:09<00:00,  3.21it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9428238039673279
Precision : 0.29508196721311475
recall : 0.75
f_score : 0.42352941176470593
f01_score : 0.2968647942521228
auc : 0.8491896758703482
specificity : 0.9483793517406963

Dataset  32
length of training set : 570
length of test set : 857
#anomaly in test set : 47


100%|██████████| 30/30 [00:09<00:00,  3.20it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9148191365227538
Precision : 0.30303030303030304
recall : 0.425531914893617
f_score : 0.35398230088495575
f01_score : 0.30389649465924473
auc : 0.6843708957184134
specificity : 0.9432098765432099

Dataset  33
length of training set : 575
length of test set : 864
#anomaly in test set : 2


100%|██████████| 30/30 [00:09<00:00,  3.24it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988425925925926
Precision : 1.0
recall : 0.5
f_score : 0.6666666666666666
f01_score : 0.9901960784313726
auc : 0.75
specificity : 1.0

Dataset  34
length of training set : 570
length of test set : 857
#anomaly in test set : 7


100%|██████████| 30/30 [00:09<00:00,  3.28it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9964994165694282
Precision : 0.7
recall : 1.0
f_score : 0.8235294117647058
f01_score : 0.702085402184707
auc : 0.998235294117647
specificity : 0.9964705882352941

Dataset  35
length of training set : 570
length of test set : 857
#anomaly in test set : 0

Dataset  36
length of training set : 584
length of test set : 877
#anomaly in test set : 1


100%|██████████| 30/30 [00:09<00:00,  3.08it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9965792474344356
Precision : 0.25
recall : 1.0
f_score : 0.4
f01_score : 0.2518703241895262
auc : 0.9982876712328766
specificity : 0.9965753424657534

Dataset  37
length of training set : 573
length of test set : 861
#anomaly in test set : 34


100%|██████████| 30/30 [00:09<00:00,  3.26it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9465737514518002
Precision : 0.125
recall : 0.058823529411764705
f_score : 0.07999999999999999
f01_score : 0.12362301101591187
auc : 0.5209474358062451
specificity : 0.9830713422007256

Dataset  38
length of training set : 570
length of test set : 857
#anomaly in test set : 9


100%|██████████| 30/30 [00:09<00:00,  3.29it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9918319719953326
Precision : 0.6666666666666666
recall : 0.4444444444444444
f_score : 0.5333333333333333
f01_score : 0.6633825944170771
auc : 0.7210429769392033
specificity : 0.9976415094339622

Dataset  39
length of training set : 570
length of test set : 857
#anomaly in test set : 8


100%|██████████| 30/30 [00:09<00:00,  3.32it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9964994165694282
Precision : 1.0
recall : 0.625
f_score : 0.7692307692307693
f01_score : 0.9940944881889763
auc : 0.8125
specificity : 1.0

Dataset  40
length of training set : 570
length of test set : 857
#anomaly in test set : 80


100%|██████████| 30/30 [00:09<00:00,  3.30it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.8763127187864644
Precision : 0.03571428571428571
recall : 0.0125
f_score : 0.01851851851851852
f01_score : 0.03506944444444444
auc : 0.4888754826254826
specificity : 0.9652509652509652

Dataset  41
length of training set : 574
length of test set : 861
#anomaly in test set : 2


100%|██████████| 30/30 [00:09<00:00,  3.23it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988385598141696
Precision : 0.6666666666666666
recall : 1.0
f_score : 0.8
f01_score : 0.6688741721854305
auc : 0.9994179278230501
specificity : 0.9988358556461001

Dataset  42
length of training set : 576
length of test set : 864
#anomaly in test set : 44


100%|██████████| 30/30 [00:09<00:00,  3.26it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9745370370370371
Precision : 1.0
recall : 0.5
f_score : 0.6666666666666666
f01_score : 0.9901960784313726
auc : 0.75
specificity : 1.0

Dataset  43
length of training set : 576
length of test set : 864
#anomaly in test set : 27


100%|██████████| 30/30 [00:09<00:00,  3.28it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9722222222222222
Precision : 1.0
recall : 0.1111111111111111
f_score : 0.19999999999999998
f01_score : 0.926605504587156
auc : 0.5555555555555556
specificity : 1.0

Dataset  44
length of training set : 584
length of test set : 877
#anomaly in test set : 2


100%|██████████| 30/30 [00:09<00:00,  3.23it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9486887115165337
Precision : 0.022222222222222223
recall : 0.5
f_score : 0.0425531914893617
f01_score : 0.02243447356730342
auc : 0.7248571428571429
specificity : 0.9497142857142857

Dataset  45
length of training set : 576
length of test set : 864
#anomaly in test set : 1


100%|██████████| 30/30 [00:09<00:00,  3.26it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  46
length of training set : 576
length of test set : 865
#anomaly in test set : 109


100%|██████████| 30/30 [00:09<00:00,  3.25it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9895953757225433
Precision : 1.0
recall : 0.9174311926605505
f_score : 0.9569377990430622
f01_score : 0.999109704223959
auc : 0.9587155963302753
specificity : 1.0

Dataset  47
length of training set : 570
length of test set : 857
#anomaly in test set : 10


100%|██████████| 30/30 [00:09<00:00,  3.26it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9474912485414235
Precision : 0.02702702702702703
recall : 0.1
f_score : 0.0425531914893617
f01_score : 0.027223719676549865
auc : 0.5287485242030697
specificity : 0.9574970484061394

Dataset  48
length of training set : 575
length of test set : 864
#anomaly in test set : 0

Dataset  49
length of training set : 584
length of test set : 877
#anomaly in test set : 0

Dataset  50
length of training set : 575
length of test set : 864
#anomaly in test set : 7


100%|██████████| 30/30 [00:09<00:00,  3.29it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9965277777777778
Precision : 0.7
recall : 1.0
f_score : 0.8235294117647058
f01_score : 0.702085402184707
auc : 0.998249708284714
specificity : 0.9964994165694282

Dataset  51
length of training set : 570
length of test set : 857
#anomaly in test set : 4


100%|██████████| 30/30 [00:09<00:00,  3.29it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9918319719953326
Precision : 0.3333333333333333
recall : 0.75
f_score : 0.46153846153846156
f01_score : 0.33517699115044247
auc : 0.871483001172333
specificity : 0.9929660023446659

Dataset  52
length of training set : 572
length of test set : 860
#anomaly in test set : 9


100%|██████████| 30/30 [00:09<00:00,  3.24it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9941860465116279
Precision : 1.0
recall : 0.4444444444444444
f_score : 0.6153846153846153
f01_score : 0.9877750611246944
auc : 0.7222222222222222
specificity : 1.0

Dataset  53
length of training set : 584
length of test set : 877
#anomaly in test set : 15


100%|██████████| 30/30 [00:09<00:00,  3.18it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  54
length of training set : 296
length of test set : 445
#anomaly in test set : 0

Dataset  55
length of training set : 570
length of test set : 857
#anomaly in test set : 5


100%|██████████| 30/30 [00:09<00:00,  3.32it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9358226371061844
Precision : 0.06896551724137931
recall : 0.8
f_score : 0.12698412698412698
f01_score : 0.06959517657192074
auc : 0.8683098591549295
specificity : 0.9366197183098591

Dataset  56
length of training set : 570
length of test set : 857
#anomaly in test set : 5


100%|██████████| 30/30 [00:08<00:00,  3.34it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9649941656942824
Precision : 0.12121212121212122
recall : 0.8
f_score : 0.2105263157894737
f01_score : 0.12223903177004539
auc : 0.882981220657277
specificity : 0.965962441314554

Dataset  57
length of training set : 576
length of test set : 865
#anomaly in test set : 3


100%|██████████| 30/30 [00:09<00:00,  3.29it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9433526011560693
Precision : 0.0
recall : 0.0
f_score : 0.0
f01_score : 0
auc : 0.4733178654292343
specificity : 0.9466357308584686

Dataset  58
length of training set : 574
length of test set : 861
#anomaly in test set : 43


100%|██████████| 30/30 [00:09<00:00,  3.24it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988385598141696
Precision : 1.0
recall : 0.9767441860465116
f_score : 0.988235294117647
f01_score : 0.9997643176997407
auc : 0.9883720930232558
specificity : 1.0

Dataset  59
length of training set : 569
length of test set : 854
#anomaly in test set : 0

Dataset  60
length of training set : 584
length of test set : 877
#anomaly in test set : 11


100%|██████████| 30/30 [00:09<00:00,  3.16it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.992018244013683
Precision : 1.0
recall : 0.36363636363636365
f_score : 0.5333333333333333
f01_score : 0.9829683698296837
auc : 0.6818181818181819
specificity : 1.0

Dataset  61
length of training set : 576
length of test set : 865
#anomaly in test set : 24


100%|██████████| 30/30 [00:08<00:00,  3.33it/s]
  3%|▎         | 1/30 [00:00<00:04,  6.51it/s]

Accuracy : 0.9341040462427745
Precision : 0.02857142857142857
recall : 0.041666666666666664
f_score : 0.03389830508474576
f01_score : 0.028660612939841085
auc : 0.5006193024177567
specificity : 0.9595719381688466

Dataset  62
length of training set : 296
length of test set : 445
#anomaly in test set : 4


100%|██████████| 30/30 [00:04<00:00,  6.67it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9955056179775281
Precision : 0.6666666666666666
recall : 1.0
f_score : 0.8
f01_score : 0.6688741721854305
auc : 0.997732426303855
specificity : 0.9954648526077098

Dataset  63
length of training set : 575
length of test set : 864
#anomaly in test set : 8


100%|██████████| 30/30 [00:09<00:00,  3.28it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9837962962962963
Precision : 0.35
recall : 0.875
f_score : 0.4999999999999999
f01_score : 0.3520916334661355
auc : 0.9299065420560748
specificity : 0.9848130841121495

Dataset  64
length of training set : 576
length of test set : 865
#anomaly in test set : 0

Dataset  65
length of training set : 569
length of test set : 855
#anomaly in test set : 17


100%|██████████| 30/30 [00:09<00:00,  3.25it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9777777777777777
Precision : 0.4166666666666667
recall : 0.29411764705882354
f_score : 0.3448275862068966
f01_score : 0.4149548069022186
auc : 0.6428822125508915
specificity : 0.9916467780429594

Dataset  66
length of training set : 569
length of test set : 855
#anomaly in test set : 21


100%|██████████| 30/30 [00:08<00:00,  3.37it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9976608187134502
Precision : 1.0
recall : 0.9047619047619048
f_score : 0.9500000000000001
f01_score : 0.9989588755856326
auc : 0.9523809523809523
specificity : 1.0

Dataset  67
length of training set : 569
length of test set : 854
#anomaly in test set : 23


100%|██████████| 30/30 [00:09<00:00,  3.27it/s]


Accuracy : 0.9929742388758782
Precision : 1.0
recall : 0.7391304347826086
f_score : 0.85
f01_score : 0.9965177016831108
auc : 0.8695652173913043
specificity : 1.0
Avg Accuracy : 0.9712094510401154
Avg Precision : 0.6996737013186499
Avg recall : 0.5994888764637065
Avg f_score : 0.5654411360093204
Avg f01_score : 0.6919217134382012
Avg auc : 0.7946430032432271
Avg specificity : 0.9897971300227478
--- 9.155613319079082 minutes ---


In [57]:
# vg Accuracy : 0.9715250877309947
# Avg Precision : 0.6842294223192591
# Avg recall : 0.6168679144660715
# Avg f_score : 0.5638775878595538
# Avg f01_score : 0.6769198146771511
# Avg auc : 0.8034431133745205
# Avg specificity : 0.99001831228297
# --- 9.138245284557343 minutes ---

In [58]:
# model = CLSTMED(num_epochs=30, sequence_length=10, hidden_size=25, n_layers=(2, 2), dropout=(0, 0)) #57 708 recall 618
# pytorch_total_params = sum(p.numel() for p in model.clstmed.parameters() if p.requires_grad)
# print(pytorch_total_params)

# pytorch_total_params = sum(p.numel() for p in model.clstmed.parameters())
# print(pytorch_total_params)