In [None]:
import json
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from libraries.utils import *
from libraries.hybrid import hybrid

## Load Data

In [None]:
############ configuration ################
############################################

CODE = 'theft_protection'       ### application (code)
BEHAVIOUR_FAULTY = 'faulty_data'            ### normal, faulty_data
BEHAVIOUR_NORMAL = 'normal'            ### normal, faulty_data
THREAD = 'single'           ### single, multi
VER = 3                     ### format of data collection

base_dir = '../trace_data' ### can be replaced with 'csv', 'exe_plot', 'histogram'
normalbase_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR_NORMAL}'
faultybase_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR_FAULTY}'

print(normalbase_path)
print(faultybase_path)

In [None]:

train_base_path = os.path.join(normalbase_path, 'train_data')
train_data_path = [os.path.join(train_base_path, x) for x in os.listdir(train_base_path)]



######### get paths #######################
paths_log, paths_traces, varlist_path, paths_label = get_paths(faultybase_path)

### remove.Ds_store from all lists
paths_log = [x for x in paths_log if '.DS_Store' not in x]
paths_traces = [x for x in paths_traces if '.DS_Store' not in x]
varlist_path = [x for x in varlist_path if '.DS_Store' not in x]
paths_label = [x for x in paths_label if '.DS_Store' not in x]

paths_log.sort()
paths_traces.sort()
varlist_path.sort()
paths_label.sort()

# print(paths_log)
# print(paths_traces)
# print(varlist_path)
# print(paths_label)

test_data_path = paths_traces
test_label_path = paths_label

print(train_data_path)
print(test_data_path)
print(test_label_path)


In [None]:
############# check varlist is consistent ############
############# only for version 3 ######################

if VER == 3:
    to_number = is_consistent(varlist_path)

    if to_number != False:
        from_number = mapint2var(to_number)


In [None]:
############ Get variable list ######################
sorted_keys = list(from_number.keys())
sorted_keys.sort()
var_list = [from_number[key] for key in sorted_keys]   ### get the variable list
# print(var_list)

## Train

In [None]:
### initialize the hybrid model
hybrid = hybrid()

In [None]:
hybrid.train(train_data_path)

In [None]:
transitions = hybrid.transitions
print(transitions)

In [None]:
### viz transitions

for key in transitions.keys():
    print(from_number[key], ':', end=' ')
    for val in transitions[key]:
        print(from_number[val], end=', ')
    print('\n')

In [None]:
thresholds = hybrid.thresholds
### visualize the thresholds for varlist
for key in thresholds.keys():
    print(from_number[key], ':', end=' ')
    print(thresholds[key], end=', ')
    print('\n')

### Visualising Thresholds

In [None]:
#### plot exe_list to vsiualize the distribution of execution intervals
hybrid.viz_thresholds()


### Validation

In [None]:
#### Detect anomalies in faulty traces
DIFF_VAL = 1
all_tp = []
all_fp = []
all_detections = [] ### format [file1_detection, file2_detection] -> file1_detection: [(state1, 0), (ts1, ts2), filename]  
y_pred_all = []
y_true_all = []
for ti, (test_data, test_label) in enumerate(zip(test_data_path, test_label_path)):
    print(ti, test_data, test_label)
    if ti == 1:
        
        st_detection, ei_detection = hybrid.test_single(test_data, thresholds)   ### detection in format: [var, (ts1,ts2), file_name]
        break

In [None]:
st_detection

In [None]:
ei_detection