In [19]:
import abc
import logging
import random
import numpy as np
import torch
from torch.autograd import Variable

class Algorithm(metaclass=abc.ABCMeta):
    def __init__(self, module_name, name, seed, details=False):
        self.logger = logging.getLogger(module_name)
        self.name = name
        self.seed = seed
        self.details = details
        self.prediction_details = {}
        self.history = {}

        if self.seed is not None:
            random.seed(seed)
            np.random.seed(seed)

    def __str__(self):
        return self.name

    @abc.abstractmethod
    def fit(self, X):
        """
        Train the algorithm on the given dataset
        """

    @abc.abstractmethod
    def predict(self, X):
        """
        :return anomaly score
        """


class PyTorchUtils(metaclass=abc.ABCMeta):
    def __init__(self, seed, gpu):
        self.gpu = gpu
        self.seed = seed
        if self.seed is not None:
            torch.manual_seed(self.seed)
            torch.cuda.manual_seed(self.seed)
        self.framework = 0

    @property
    def device(self):
        return torch.device('cuda:{self.gpu}' if torch.cuda.is_available() and self.gpu is not None else 'cpu')

    def to_var(self, t, **kwargs):
        # ToDo: check whether cuda Variable.
        t = t.to(self.device)
        return Variable(t, **kwargs)

    def to_device(self, model):
        model.to(self.device)

In [20]:
import time
start_time = time.time()

import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from scipy.stats import multivariate_normal
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from tqdm import trange
import matplotlib.pyplot as plt
%matplotlib inline

!git clone https://github.com/priyojitk/mit-resources.git yahoo_dataset
file_path = '/content/yahoo_dataset/'

import sys
sys.path.append(file_path)
from sklearn.metrics import confusion_matrix

fatal: destination path 'yahoo_dataset' already exists and is not an empty directory.


In [21]:

class LSTMEDModule(nn.Module):
    def __init__(self, n_features: int, hidden_size: int, n_layers: tuple, dropout: tuple):
        super().__init__()
        self.n_features = n_features
        self.hidden_size = hidden_size
        self.num_lyers = n_layers
        self.dropout = dropout

        self.encoder = nn.LSTM(self.n_features, self.hidden_size, batch_first=True, num_layers=self.num_lyers[0], dropout=self.dropout[0], bias = True)
        self.decoder = nn.LSTM(self.hidden_size, self.n_features, batch_first=True, num_layers=self.num_lyers[1], dropout=self.dropout[1], bias = True)

    def forward(self, input):
        sequence_length = input.shape[1]
     
        encoded, (last_hidden, _) = self.encoder(input.float())
        decoder_input = encoded[:, -1:].repeat(1, sequence_length, 1)
        decoded, _ = self.decoder(decoder_input)  

        return decoded

In [22]:
class LSTMED(Algorithm, PyTorchUtils):
    def __init__(self, name: str = 'LSTM-ED',
                 num_epochs: int = 10, 
                 batch_size: int = 20, 
                 lr: float = 1e-3,
                 hidden_size: int = 5,
                 sequence_length: int = 48,
                 train_gaussian_percentage: float = 0.2,
                 n_layers: tuple = (1, 1), 
                 use_bias: tuple = (True, True),
                 dropout: tuple = (0, 0),
                 seed: int = 1,
                 gpu: int = None,
                 details=True):
        
        Algorithm.__init__(self, __name__, name, seed, details=details)

        if self.seed is not None:
            random.seed(seed)
            np.random.seed(seed)
        
        
        PyTorchUtils.__init__(self, seed, gpu)
        
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.lr = lr

        self.hidden_size = hidden_size
        self.sequence_length = sequence_length
        self.train_gaussian_percentage = train_gaussian_percentage

        self.n_layers = n_layers
        self.use_bias = use_bias
        self.dropout = dropout
        self.mean, self.cov = None, None

        # self.lstmed = LSTMEDModule(1, self.hidden_size, self.n_layers, self.dropout)


    def fit(self, X: pd.DataFrame):
        
        X.interpolate(inplace=True) #
        X.bfill(inplace=True) #backward fill values
        data = X.values
        sequences = [data[i:i + self.sequence_length] for i in range(data.shape[0] - self.sequence_length + 1)]
        indices = np.random.permutation(len(sequences)) 
        
        split_point = int(self.train_gaussian_percentage * len(sequences))
        train_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True,
                                  sampler=SubsetRandomSampler(indices[:-split_point]), pin_memory=True)
        train_gaussian_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True,
                                           sampler=SubsetRandomSampler(indices[-split_point:]), pin_memory=True)

        self.lstmed = LSTMEDModule(X.shape[1], self.hidden_size, self.n_layers, self.dropout)
        self.to_device(self.lstmed)
        optimizer = torch.optim.Adam(self.lstmed.parameters(), lr=self.lr)

        self.lstmed.train() #set to training mode=True
        self.history['train_loss'] = []
        self.history['val_loss'] = []
        
        for epoch in trange(self.num_epochs): #self.num_epochs
            self.logger.debug(f'Epoch {epoch+1}/{self.num_epochs}.')
            self.lstmed.train() #set to training mode=True
            train_batch_loss = []
            for ts_batch in train_loader:
                output = self.lstmed(self.to_var(ts_batch))
                loss = nn.MSELoss(reduction='mean')(output, self.to_var(ts_batch.float())) 
                self.lstmed.zero_grad()
                loss.backward()
                optimizer.step()
                train_batch_loss.append(loss.item())
            train_loss = sum(train_batch_loss)/len(train_batch_loss)
            self.history['train_loss'].append(train_loss)
        

            self.lstmed.eval()
            val_batch_loss = []
            for ts_batch in train_gaussian_loader:
                output = self.lstmed(self.to_var(ts_batch))
                val_loss = nn.MSELoss(reduction='mean')(output, self.to_var(ts_batch.float())) 
                val_batch_loss.append(val_loss.item())
            self.history['val_loss'].append(sum(val_batch_loss)/len(val_batch_loss))

        self.lstmed.eval()
        error_vectors = []
        count = 0
        for ts_batch in train_gaussian_loader:
            output = self.lstmed(self.to_var(ts_batch))
            error = nn.L1Loss(reduction='none')(output, self.to_var(ts_batch.float())) #MAE
            error_vectors += list(error.reshape(-1, X.shape[1]).data.cpu().numpy())
            
        self.mean = np.mean(error_vectors, axis=0)
        self.cov = np.cov(error_vectors, rowvar=False)

    def predict(self, X: pd.DataFrame):
        X.interpolate(inplace=True)
        X.bfill(inplace=True)
        data = X.values
        sequences = [data[i:i + self.sequence_length] for i in range(data.shape[0] - self.sequence_length + 1)]
        data_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, shuffle=False, drop_last=False)

        self.lstmed.eval()
        mvnormal = multivariate_normal(self.mean, self.cov, allow_singular=True)
        scores = []
        outputs = []
        errors = []
        for idx, ts in enumerate(data_loader):
            output = self.lstmed(self.to_var(ts))
            error = nn.L1Loss(reduction='none')(output, self.to_var(ts.float()))
            score = -mvnormal.logpdf(error.reshape(-1, X.shape[1]).data.cpu().numpy())
            scores.append(score.reshape(ts.size(0), self.sequence_length)) #ts.size() == batch size
            if self.details:
                outputs.append(output.data.numpy())
                errors.append(error.data.numpy())

        # stores seq_len-many scores per timestamp and averages them
        scores = np.concatenate(scores)
        lattice = np.full((self.sequence_length, data.shape[0]), np.nan)
        for i, score in enumerate(scores):
            lattice[i % self.sequence_length, i:i + self.sequence_length] = score
        scores = np.nanmean(lattice, axis=0)

        return scores

In [23]:
from sklearn.metrics import accuracy_score, fbeta_score
from sklearn.metrics import precision_recall_fscore_support as prf
from sklearn.metrics import roc_curve, auc, roc_auc_score

def get_accuracy_precision_recall_fscore(y_true: list, y_pred: list):
        accuracy = accuracy_score(y_true, y_pred)
        # warn_for=() avoids log warnings for any result being zero
        precision, recall, f_score, _ = prf(y_true, y_pred, average='binary', warn_for=())
        if precision == 0 and recall == 0:
            f01_score = 0
        else:
            f01_score = fbeta_score(y_true, y_pred, average='binary', beta=0.1)
        return accuracy, precision, recall, f_score, f01_score

# accuracy, precision, recall, f_score, f01_score = get_accuracy_precision_recall_fscore(_test['is_anomaly'], pred_y)

In [24]:
def threshold(score):
    return np.nanmean(score) + 2 * np.nanstd(score)
def get_specificity(actual_y, pred_y):
    tn, fp, fn, tp = confusion_matrix(actual_y, pred_y).ravel()
    specificity = tn / (tn + fp)
    return specificity

In [25]:
avg_accuracy, avg_precision, avg_recall, avg_f_score, avg_f01_score = 0, 0, 0, 0, 0
avg_auc = 0
avg_specificity = 0
A1 = 67
no_of_dataset = A1
zero_anomaly_test = 0

In [26]:
# model = LSTMED(num_epochs=30, sequence_length=10, hidden_size=25, n_layers=(2, 2), dropout=(0, 0)) #57 708 recall 618
# pytorch_total_params = sum(p.numel() for p in model.lstmed.parameters() if p.requires_grad)
# print(pytorch_total_params)

# pytorch_total_params = sum(p.numel() for p in model.lstmed.parameters())
# print(pytorch_total_params)

In [27]:
for ind in range(1, no_of_dataset + 1):

    print('\nDataset ', ind)
    logging.info(f'\n\n=============\n Dataset {ind} : \n=============')
    
    df = pd.read_csv(f'{file_path}dataset/ydata-labeled-time-series-anomalies-v1_0/A1Benchmark/real_{ind}.csv', index_col='timestamp')

    from sklearn.preprocessing import StandardScaler
    df['value'] = df['value'].astype(np.float64)

    # Normalize the  data (center around 0 and scale to remove the variance).
    scaler = StandardScaler()
    df['value'] = scaler.fit_transform(df['value'].values.reshape(-1, 1))

    len_of_data = df.shape[0]
    _train = df[0:int(len_of_data * 0.4)] # 40 percent
    _test = df[int(len_of_data * 0.4):] # 60 percent
    print('length of training set :', len(_train))
    print('length of test set :', len(_test))  
    
    #If there is no anomaly in 'test' data or whole data ignore the dataset
    anomaly_count_test = len(_test[_test['is_anomaly'] == 1])
    print('#anomaly in test set :', anomaly_count_test)
    if anomaly_count_test == 0:
        zero_anomaly_test += 1
        continue
    
    logging.info(f'length of training set : {len(_train)}')
    logging.info(f'length of test set : {len(_test)}')

    # model = LSTMED(num_epochs=30, sequence_length=10, hidden_size=10, n_layers=(1, 1), dropout=(0, 0)) #575 p 71 recall61
    model = LSTMED(num_epochs=30, sequence_length=10, hidden_size=25, n_layers=(2, 2), dropout=(0, 0)) #57 708 recall 618


    model.fit(_train[['value']].copy())

    # plt.figure(figsize=(20, 4))
    # plt.title('Training loss / Val Loss')
    # plt.plot(model.history['train_loss'], label='train_loss')
    # plt.plot(model.history['val_loss'], label='val_loss')
    # plt.legend()
    # plt.show()

    scores = model.predict(_test[['value']].copy())
    th = threshold(scores)
    pred_y = np.where(np.array(scores) > th, 1, 0) 
    # th = 0.05
    # pred_y = np.where(np.array(scores) < th, 1, 0) 

    # plt.figure(figsize=(20, 4))
    # plt.title('Pred_y / actual_anomaly')
    # plt.plot(pred_y, label='pred_y')
    # plt.plot(_test['is_anomaly'].values, label='actual_anomaly')
    # plt.legend()
    # plt.show()

    accuracy, precision, recall, f_score, f01_score = get_accuracy_precision_recall_fscore(_test['is_anomaly'], pred_y)
    auc = roc_auc_score(_test['is_anomaly'], pred_y)
    specificity = get_specificity(_test['is_anomaly'], pred_y)

    
    print(f'Accuracy : {accuracy}')
    print(f'Precision : {precision}')
    print(f'recall : {recall}')
    print(f'f_score : {f_score}')
    print(f'f01_score : {f01_score}')
    print(f'auc : {auc}')
    print(f'specificity : {specificity}')

    avg_accuracy += accuracy
    avg_precision += precision
    avg_recall += recall
    avg_f_score += f_score
    avg_f01_score += f01_score
    avg_auc += auc
    avg_specificity += specificity


denominator = (no_of_dataset - zero_anomaly_test)
print(f'\nAvg Accuracy : {avg_accuracy / denominator }')
print(f'Avg Precision : {avg_precision / denominator }')
print(f'Avg recall : {avg_recall / denominator }')
print(f'Avg f_score : {avg_f_score / denominator }')
print(f'Avg f01_score : {avg_f01_score / denominator }')
print(f'Avg auc : {avg_auc / denominator }')
print(f'Avg specificity : {avg_specificity / denominator }')

print(f"--- {(time.time() - start_time)/60} minutes ---" )


  0%|          | 0/30 [00:00<?, ?it/s]


Dataset  1
length of training set : 568
length of test set : 852
#anomaly in test set : 2


100%|██████████| 30/30 [00:07<00:00,  3.88it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9870892018779343
Precision : 0.15384615384615385
recall : 1.0
f_score : 0.2666666666666667
f01_score : 0.15514592933947774
auc : 0.9935294117647059
specificity : 0.9870588235294118

Dataset  2
length of training set : 575
length of test set : 864
#anomaly in test set : 16


100%|██████████| 30/30 [00:07<00:00,  3.90it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9872685185185185
Precision : 1.0
recall : 0.3125
f_score : 0.47619047619047616
f01_score : 0.9786821705426356
auc : 0.65625
specificity : 1.0

Dataset  3
length of training set : 584
length of test set : 877
#anomaly in test set : 14


100%|██████████| 30/30 [00:07<00:00,  3.80it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9942987457240593
Precision : 1.0
recall : 0.6428571428571429
f_score : 0.782608695652174
f01_score : 0.9945295404814004
auc : 0.8214285714285714
specificity : 1.0

Dataset  4
length of training set : 569
length of test set : 854
#anomaly in test set : 5


100%|██████████| 30/30 [00:07<00:00,  3.96it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  5
length of training set : 575
length of test set : 864
#anomaly in test set : 0

Dataset  6
length of training set : 575
length of test set : 864
#anomaly in test set : 8


100%|██████████| 30/30 [00:07<00:00,  3.95it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988425925925926
Precision : 1.0
recall : 0.875
f_score : 0.9333333333333333
f01_score : 0.998587570621469
auc : 0.9375
specificity : 1.0

Dataset  7
length of training set : 569
length of test set : 854
#anomaly in test set : 38


100%|██████████| 30/30 [00:07<00:00,  3.93it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9672131147540983
Precision : 0.8571428571428571
recall : 0.3157894736842105
f_score : 0.46153846153846156
f01_score : 0.8428372739916551
auc : 0.6566692466460269
specificity : 0.9975490196078431

Dataset  8
length of training set : 568
length of test set : 852
#anomaly in test set : 10


100%|██████████| 30/30 [00:07<00:00,  3.93it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988262910798122
Precision : 0.9090909090909091
recall : 1.0
f_score : 0.9523809523809523
f01_score : 0.9099099099099099
auc : 0.9994061757719714
specificity : 0.998812351543943

Dataset  9
length of training set : 584
length of test set : 877
#anomaly in test set : 8


100%|██████████| 30/30 [00:07<00:00,  3.81it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9931584948688712
Precision : 1.0
recall : 0.25
f_score : 0.4
f01_score : 0.9711538461538461
auc : 0.625
specificity : 1.0

Dataset  10
length of training set : 575
length of test set : 864
#anomaly in test set : 13


100%|██████████| 30/30 [00:07<00:00,  3.96it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  11
length of training set : 575
length of test set : 864
#anomaly in test set : 19


100%|██████████| 30/30 [00:07<00:00,  3.96it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9849537037037037
Precision : 1.0
recall : 0.3157894736842105
f_score : 0.4799999999999999
f01_score : 0.9789983844911146
auc : 0.6578947368421053
specificity : 1.0

Dataset  12
length of training set : 575
length of test set : 864
#anomaly in test set : 2


100%|██████████| 30/30 [00:07<00:00,  3.96it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988425925925926
Precision : 1.0
recall : 0.5
f_score : 0.6666666666666666
f01_score : 0.9901960784313726
auc : 0.75
specificity : 1.0

Dataset  13
length of training set : 575
length of test set : 864
#anomaly in test set : 9


100%|██████████| 30/30 [00:07<00:00,  3.96it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9965277777777778
Precision : 1.0
recall : 0.6666666666666666
f_score : 0.8
f01_score : 0.9950738916256158
auc : 0.8333333333333333
specificity : 1.0

Dataset  14
length of training set : 575
length of test set : 864
#anomaly in test set : 0

Dataset  15
length of training set : 575
length of test set : 864
#anomaly in test set : 8


100%|██████████| 30/30 [00:07<00:00,  3.95it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9976851851851852
Precision : 1.0
recall : 0.75
f_score : 0.8571428571428571
f01_score : 0.9967105263157895
auc : 0.875
specificity : 1.0

Dataset  16
length of training set : 584
length of test set : 877
#anomaly in test set : 3


100%|██████████| 30/30 [00:07<00:00,  3.77it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9977194982896237
Precision : 1.0
recall : 0.3333333333333333
f_score : 0.5
f01_score : 0.9805825242718447
auc : 0.6666666666666666
specificity : 1.0

Dataset  17
length of training set : 569
length of test set : 855
#anomaly in test set : 227


100%|██████████| 30/30 [00:07<00:00,  3.96it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.7941520467836257
Precision : 1.0
recall : 0.22466960352422907
f_score : 0.3669064748201439
f01_score : 0.9669607659095175
auc : 0.6123348017621145
specificity : 1.0

Dataset  18
length of training set : 584
length of test set : 877
#anomaly in test set : 0

Dataset  19
length of training set : 569
length of test set : 855
#anomaly in test set : 227


100%|██████████| 30/30 [00:07<00:00,  3.93it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.7672514619883041
Precision : 1.0
recall : 0.12334801762114538
f_score : 0.2196078431372549
f01_score : 0.9342583415923357
auc : 0.5616740088105727
specificity : 1.0

Dataset  20
length of training set : 568
length of test set : 854
#anomaly in test set : 9


100%|██████████| 30/30 [00:07<00:00,  3.97it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.990632318501171
Precision : 1.0
recall : 0.1111111111111111
f_score : 0.19999999999999998
f01_score : 0.926605504587156
auc : 0.5555555555555556
specificity : 1.0

Dataset  21
length of training set : 568
length of test set : 852
#anomaly in test set : 6


100%|██████████| 30/30 [00:07<00:00,  3.95it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988262910798122
Precision : 1.0
recall : 0.8333333333333334
f_score : 0.9090909090909091
f01_score : 0.9980237154150197
auc : 0.9166666666666667
specificity : 1.0

Dataset  22
length of training set : 568
length of test set : 852
#anomaly in test set : 63


100%|██████████| 30/30 [00:07<00:00,  3.96it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9553990610328639
Precision : 1.0
recall : 0.3968253968253968
f_score : 0.5681818181818182
f01_score : 0.9851736246586031
auc : 0.6984126984126984
specificity : 1.0

Dataset  23
length of training set : 568
length of test set : 852
#anomaly in test set : 9


100%|██████████| 30/30 [00:07<00:00,  3.98it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9953051643192489
Precision : 0.6923076923076923
recall : 1.0
f_score : 0.8181818181818181
f01_score : 0.6944232238349884
auc : 0.9976275207591934
specificity : 0.9952550415183867

Dataset  24
length of training set : 584
length of test set : 877
#anomaly in test set : 15


100%|██████████| 30/30 [00:07<00:00,  3.80it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  25
length of training set : 574
length of test set : 861
#anomaly in test set : 43


100%|██████████| 30/30 [00:07<00:00,  3.92it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988385598141696
Precision : 1.0
recall : 0.9767441860465116
f_score : 0.988235294117647
f01_score : 0.9997643176997407
auc : 0.9883720930232558
specificity : 1.0

Dataset  26
length of training set : 574
length of test set : 861
#anomaly in test set : 75


100%|██████████| 30/30 [00:07<00:00,  3.83it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.8908246225319396
Precision : 0.0
recall : 0.0
f_score : 0.0
f01_score : 0
auc : 0.48791348600508905
specificity : 0.9758269720101781

Dataset  27
length of training set : 570
length of test set : 857
#anomaly in test set : 2


100%|██████████| 30/30 [00:07<00:00,  3.94it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  28
length of training set : 576
length of test set : 865
#anomaly in test set : 81


100%|██████████| 30/30 [00:07<00:00,  3.91it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.8682080924855491
Precision : 0.02857142857142857
recall : 0.012345679012345678
f_score : 0.017241379310344827
f01_score : 0.02820441217537001
auc : 0.4844891660367851
specificity : 0.9566326530612245

Dataset  29
length of training set : 576
length of test set : 865
#anomaly in test set : 6


100%|██████████| 30/30 [00:07<00:00,  3.92it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9942196531791907
Precision : 0.5714285714285714
recall : 0.6666666666666666
f_score : 0.6153846153846153
f01_score : 0.5722379603399433
auc : 0.8315871168024834
specificity : 0.9965075669383003

Dataset  30
length of training set : 584
length of test set : 877
#anomaly in test set : 8


100%|██████████| 30/30 [00:08<00:00,  3.75it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988597491448119
Precision : 1.0
recall : 0.875
f_score : 0.9333333333333333
f01_score : 0.998587570621469
auc : 0.9375
specificity : 1.0

Dataset  31
length of training set : 570
length of test set : 857
#anomaly in test set : 24


100%|██████████| 30/30 [00:07<00:00,  3.97it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9428238039673279
Precision : 0.29508196721311475
recall : 0.75
f_score : 0.42352941176470593
f01_score : 0.2968647942521228
auc : 0.8491896758703482
specificity : 0.9483793517406963

Dataset  32
length of training set : 570
length of test set : 857
#anomaly in test set : 47


100%|██████████| 30/30 [00:07<00:00,  3.91it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9183197199533255
Precision : 0.3333333333333333
recall : 0.48936170212765956
f_score : 0.396551724137931
f01_score : 0.33438894486828846
auc : 0.7162857893354347
specificity : 0.9432098765432099

Dataset  33
length of training set : 575
length of test set : 864
#anomaly in test set : 2


100%|██████████| 30/30 [00:07<00:00,  3.90it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988425925925926
Precision : 1.0
recall : 0.5
f_score : 0.6666666666666666
f01_score : 0.9901960784313726
auc : 0.75
specificity : 1.0

Dataset  34
length of training set : 570
length of test set : 857
#anomaly in test set : 7


100%|██████████| 30/30 [00:07<00:00,  3.87it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9976662777129521
Precision : 0.7777777777777778
recall : 1.0
f_score : 0.8750000000000001
f01_score : 0.7794928335170892
auc : 0.9988235294117648
specificity : 0.9976470588235294

Dataset  35
length of training set : 570
length of test set : 857
#anomaly in test set : 0

Dataset  36
length of training set : 584
length of test set : 877
#anomaly in test set : 1


100%|██████████| 30/30 [00:08<00:00,  3.75it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9954389965792474
Precision : 0.2
recall : 1.0
f_score : 0.33333333333333337
f01_score : 0.20159680638722557
auc : 0.997716894977169
specificity : 0.9954337899543378

Dataset  37
length of training set : 573
length of test set : 861
#anomaly in test set : 34


100%|██████████| 30/30 [00:07<00:00,  3.93it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.943089430894309
Precision : 0.17391304347826086
recall : 0.11764705882352941
f_score : 0.14035087719298242
f01_score : 0.17309340188517566
auc : 0.5473362259051141
specificity : 0.9770253929866989

Dataset  38
length of training set : 570
length of test set : 857
#anomaly in test set : 9


100%|██████████| 30/30 [00:07<00:00,  3.92it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9929988331388565
Precision : 0.7142857142857143
recall : 0.5555555555555556
f_score : 0.6250000000000001
f01_score : 0.7122708039492243
auc : 0.7765985324947589
specificity : 0.9976415094339622

Dataset  39
length of training set : 570
length of test set : 857
#anomaly in test set : 8


100%|██████████| 30/30 [00:07<00:00,  3.93it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9964994165694282
Precision : 1.0
recall : 0.625
f_score : 0.7692307692307693
f01_score : 0.9940944881889763
auc : 0.8125
specificity : 1.0

Dataset  40
length of training set : 570
length of test set : 857
#anomaly in test set : 80


100%|██████████| 30/30 [00:07<00:00,  3.91it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.8786464410735122
Precision : 0.038461538461538464
recall : 0.0125
f_score : 0.01886792452830189
f01_score : 0.03768656716417911
auc : 0.4901624839124839
specificity : 0.9678249678249679

Dataset  41
length of training set : 574
length of test set : 861
#anomaly in test set : 2


100%|██████████| 30/30 [00:07<00:00,  3.91it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988385598141696
Precision : 0.6666666666666666
recall : 1.0
f_score : 0.8
f01_score : 0.6688741721854305
auc : 0.9994179278230501
specificity : 0.9988358556461001

Dataset  42
length of training set : 576
length of test set : 864
#anomaly in test set : 44


100%|██████████| 30/30 [00:07<00:00,  3.91it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9733796296296297
Precision : 1.0
recall : 0.4772727272727273
f_score : 0.6461538461538462
f01_score : 0.9892723880597014
auc : 0.7386363636363636
specificity : 1.0

Dataset  43
length of training set : 576
length of test set : 864
#anomaly in test set : 27


100%|██████████| 30/30 [00:07<00:00,  3.93it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9722222222222222
Precision : 1.0
recall : 0.1111111111111111
f_score : 0.19999999999999998
f01_score : 0.926605504587156
auc : 0.5555555555555556
specificity : 1.0

Dataset  44
length of training set : 584
length of test set : 877
#anomaly in test set : 2


100%|██████████| 30/30 [00:08<00:00,  3.74it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9532497149372862
Precision : 0.046511627906976744
recall : 1.0
f_score : 0.08888888888888888
f01_score : 0.04695490469549046
auc : 0.9765714285714286
specificity : 0.9531428571428572

Dataset  45
length of training set : 576
length of test set : 864
#anomaly in test set : 1


100%|██████████| 30/30 [00:07<00:00,  3.95it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  46
length of training set : 576
length of test set : 865
#anomaly in test set : 109


100%|██████████| 30/30 [00:07<00:00,  3.91it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9861271676300578
Precision : 1.0
recall : 0.8899082568807339
f_score : 0.941747572815534
f01_score : 0.9987766337037415
auc : 0.944954128440367
specificity : 1.0

Dataset  47
length of training set : 570
length of test set : 857
#anomaly in test set : 10


100%|██████████| 30/30 [00:07<00:00,  3.94it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.969661610268378
Precision : 0.1
recall : 0.2
f_score : 0.13333333333333333
f01_score : 0.10049751243781095
auc : 0.5893742621015349
specificity : 0.9787485242030697

Dataset  48
length of training set : 575
length of test set : 864
#anomaly in test set : 0

Dataset  49
length of training set : 584
length of test set : 877
#anomaly in test set : 0

Dataset  50
length of training set : 575
length of test set : 864
#anomaly in test set : 7


100%|██████████| 30/30 [00:07<00:00,  3.96it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9965277777777778
Precision : 0.7
recall : 1.0
f_score : 0.8235294117647058
f01_score : 0.702085402184707
auc : 0.998249708284714
specificity : 0.9964994165694282

Dataset  51
length of training set : 570
length of test set : 857
#anomaly in test set : 4


100%|██████████| 30/30 [00:07<00:00,  3.93it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9824970828471412
Precision : 0.17647058823529413
recall : 0.75
f_score : 0.2857142857142857
f01_score : 0.17781690140845074
auc : 0.8667936694021102
specificity : 0.9835873388042204

Dataset  52
length of training set : 572
length of test set : 860
#anomaly in test set : 9


100%|██████████| 30/30 [00:07<00:00,  3.94it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9941860465116279
Precision : 1.0
recall : 0.4444444444444444
f_score : 0.6153846153846153
f01_score : 0.9877750611246944
auc : 0.7222222222222222
specificity : 1.0

Dataset  53
length of training set : 584
length of test set : 877
#anomaly in test set : 15


100%|██████████| 30/30 [00:07<00:00,  3.77it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 1.0
Precision : 1.0
recall : 1.0
f_score : 1.0
f01_score : 1.0
auc : 1.0
specificity : 1.0

Dataset  54
length of training set : 296
length of test set : 445
#anomaly in test set : 0

Dataset  55
length of training set : 570
length of test set : 857
#anomaly in test set : 5


100%|██████████| 30/30 [00:07<00:00,  3.93it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9451575262543758
Precision : 0.08
recall : 0.8
f_score : 0.14545454545454545
f01_score : 0.08071928071928072
auc : 0.8730046948356808
specificity : 0.9460093896713615

Dataset  56
length of training set : 570
length of test set : 857
#anomaly in test set : 5


100%|██████████| 30/30 [00:07<00:00,  3.93it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9626604434072346
Precision : 0.11428571428571428
recall : 0.8
f_score : 0.19999999999999998
f01_score : 0.11526390870185449
auc : 0.8818075117370892
specificity : 0.9636150234741784

Dataset  57
length of training set : 576
length of test set : 865
#anomaly in test set : 3


100%|██████████| 30/30 [00:07<00:00,  3.92it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9445086705202312
Precision : 0.0
recall : 0.0
f_score : 0.0
f01_score : 0
auc : 0.47389791183294666
specificity : 0.9477958236658933

Dataset  58
length of training set : 574
length of test set : 861
#anomaly in test set : 43


100%|██████████| 30/30 [00:07<00:00,  3.90it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9988385598141696
Precision : 1.0
recall : 0.9767441860465116
f_score : 0.988235294117647
f01_score : 0.9997643176997407
auc : 0.9883720930232558
specificity : 1.0

Dataset  59
length of training set : 569
length of test set : 854
#anomaly in test set : 0

Dataset  60
length of training set : 584
length of test set : 877
#anomaly in test set : 11


100%|██████████| 30/30 [00:07<00:00,  3.79it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.992018244013683
Precision : 1.0
recall : 0.36363636363636365
f_score : 0.5333333333333333
f01_score : 0.9829683698296837
auc : 0.6818181818181819
specificity : 1.0

Dataset  61
length of training set : 576
length of test set : 865
#anomaly in test set : 24


100%|██████████| 30/30 [00:07<00:00,  3.96it/s]
  3%|▎         | 1/30 [00:00<00:03,  8.29it/s]

Accuracy : 0.9352601156069364
Precision : 0.029411764705882353
recall : 0.041666666666666664
f_score : 0.034482758620689655
f01_score : 0.029497663551401865
auc : 0.5012138327388029
specificity : 0.9607609988109393

Dataset  62
length of training set : 296
length of test set : 445
#anomaly in test set : 4


100%|██████████| 30/30 [00:03<00:00,  7.80it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9955056179775281
Precision : 0.6666666666666666
recall : 1.0
f_score : 0.8
f01_score : 0.6688741721854305
auc : 0.997732426303855
specificity : 0.9954648526077098

Dataset  63
length of training set : 575
length of test set : 864
#anomaly in test set : 8


100%|██████████| 30/30 [00:07<00:00,  3.91it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9837962962962963
Precision : 0.35
recall : 0.875
f_score : 0.4999999999999999
f01_score : 0.3520916334661355
auc : 0.9299065420560748
specificity : 0.9848130841121495

Dataset  64
length of training set : 576
length of test set : 865
#anomaly in test set : 0

Dataset  65
length of training set : 569
length of test set : 855
#anomaly in test set : 17


100%|██████████| 30/30 [00:07<00:00,  3.90it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9777777777777777
Precision : 0.4166666666666667
recall : 0.29411764705882354
f_score : 0.3448275862068966
f01_score : 0.4149548069022186
auc : 0.6428822125508915
specificity : 0.9916467780429594

Dataset  66
length of training set : 569
length of test set : 855
#anomaly in test set : 21


100%|██████████| 30/30 [00:07<00:00,  3.92it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Accuracy : 0.9976608187134502
Precision : 1.0
recall : 0.9047619047619048
f_score : 0.9500000000000001
f01_score : 0.9989588755856326
auc : 0.9523809523809523
specificity : 1.0

Dataset  67
length of training set : 569
length of test set : 854
#anomaly in test set : 23


100%|██████████| 30/30 [00:07<00:00,  3.94it/s]


Accuracy : 0.9918032786885246
Precision : 1.0
recall : 0.6956521739130435
f_score : 0.8205128205128205
f01_score : 0.995686999383857
auc : 0.8478260869565217
specificity : 1.0

Avg Accuracy : 0.9719128519140602
Avg Precision : 0.7084813910701936
Avg recall : 0.6182131014252652
Avg f_score : 0.5743589757635398
Avg f01_score : 0.7009270743114889
Avg auc : 0.8042421051804566
Avg specificity : 0.9902711089356475
--- 7.833815745512644 minutes ---
