In [None]:
import arrow
import socket
from sqlalchemy.orm import Session
from tqdm import tqdm

from april import Evaluator
from april.anomalydetection import *
from april.database import EventLog
from april.database import Model
from april.database import get_engine
from april.dataset import Dataset
from april.fs import DATE_FORMAT
from april.fs import get_event_log_files
from pm4py.objects.log.importer.xes import importer as xes_importer
import pandas as pd
import pickle

In [None]:
def common_member(a, b):   
    a_set = set(a)
    b_set = set(b)     
    # check length
    if len(a_set.intersection(b_set)) > 0:
        return(a_set.intersection(b_set)) 
    else:
        return("no common elements")

In [None]:
def fit_and_save(dataset_name, ad, ad_kwargs=None, fit_kwargs=None):
    if ad_kwargs is None:
        ad_kwargs = {}
    if fit_kwargs is None:
        fit_kwargs = {}

    # Save start time
    start_time = arrow.now()

    # Dataset
    dataset = Dataset(dataset_name)

    # AD
    ad = ad(**ad_kwargs)

    # Train and save
    ad.fit(dataset, **fit_kwargs)
    file_name = f'{dataset_name}_{ad.abbreviation}_{start_time.format(DATE_FORMAT)}'
    model_file = ad.save(file_name)

    # Save end time
    end_time = arrow.now()
    return ad

In [None]:
def threshold_att (event_data, final_scores, alpha):
    number_cases = len(event_data)
    num_events = 0
    for i in range (number_cases):
        num_events += len(event_data[i])
    temp = np.sum(final_scores, axis=0)
    threshold_arr = np.sum(temp, axis=0)
    threshold_arr = alpha*threshold_arr/num_events
    return threshold_arr

In [None]:
datasets = sorted([e.name for e in get_event_log_files() if e.p == 0.3])
ads = [dict(ad=BINetv2, fit_kwargs=dict(epochs=50, batch_size=100))]
for ad in ads:
    model = [fit_and_save(d, **ad) for d in tqdm(datasets, desc=ad['ad'].name)]

In [None]:
datasets = sorted([e.name for e in get_event_log_files()])
ads = [dict(ad=BINetv2, fit_kwargs=dict(epochs=50, batch_size=100))]
for ad in ads:
    model = [fit_and_save(d, **ad) for d in tqdm(datasets, desc=ad['ad'].name)]

In [None]:
print(model)
p1 = Evaluator(r'C:\Users\ASUS\.out\models\wide-0.1-1_binetv2_20220711-231436.646890.model').result
print(dir(Evaluator))
print(p1)
final_scores = p1.scores
print(final_scores.shape)
print(type(final_scores))

In [None]:
event_log_path = r'D:\Final master thesis evaluation\datasets\5\wide-0.1-1.xes'
event_data = xes_importer.apply(event_log_path)

In [None]:
label_csv = pd.read_csv(r'D:\Final master thesis evaluation\wide-five\wide-0.1-1.csv')
loss_df_path  = r'D:\Final master thesis evaluation\wide-five\binet_loss.pt'
label_csv.drop("Unnamed: 0", axis=1, inplace=True)
label_csv["case_id"] = pd.to_numeric(label_csv["case_id"])
label_csv.case_id.astype(str).astype(int)
label_csv.label.astype(str)
test_csv = label_csv.loc[label_csv['case_id'] > 12000]
label_csv1 = test_csv.loc[test_csv['label']!= 'normal']
M = list(label_csv1['case_id'])
print(len(test_csv))
print(len(label_csv1))
#print(M)

In [None]:
threshold_array = threshold_att(event_data, final_scores, alpha = 1)
print(threshold_array)
case_id_list = []
max_ratio_list = []
for i in range (12000, final_scores.shape[0]):
    max_loss_ratio = 0
    for j in range (final_scores.shape[1]):
        for k in range (final_scores.shape[2]):
            max_loss_ratio = max(max_loss_ratio, final_scores[i][j][k]/threshold_array[k])
    case_id_list.append(i+1)
    max_ratio_list.append(max_loss_ratio)
BINet_dictionary = {'case_id': case_id_list, 'max_ratio': max_ratio_list}
loss_df = pd.DataFrame(BINet_dictionary)
loss_df = pd.merge(loss_df, test_csv, on=["case_id"])
loss_df.to_pickle(loss_df_path)

In [None]:
suspicious_csv = loss_df.loc[loss_df['max_ratio'] > 3.85]
print(len(suspicious_csv))

In [None]:
suspicious_ids = list(suspicious_csv['case_id'])
print(len(list(common_member(suspicious_ids, M))))

In [None]:
threshold_array = threshold_att(event_data, final_scores, alpha = 5.51)
log_prediction = []
log_prediction_index = []
for i in range (12000, final_scores.shape[0]):
    case_prediction = 0
    for j in range (final_scores.shape[1]):
        for k in range (final_scores.shape[2]):
            if final_scores[i][j][k] > threshold_array[k]:
                case_prediction = 1
    log_prediction.append(case_prediction)
    if case_prediction == 1:
        log_prediction_index.append(i+1)
log_prediction_array = np.array(log_prediction)
print(threshold_array)
print(np.sum(log_prediction_array))
#print(log_prediction_index)

In [None]:
#print(common_member(log_prediction_index, M))
print(len(list(common_member(log_prediction_index, M))))