# Execution Interval Method

In [None]:
import json
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from collections import defaultdict
from libraries.utils import *
from libraries.exeint import exeInt


def get_uniquevar(raw_trace):
    ''' 
    convert the v2.2 trace into list of unique variables
    raw_trace = data from read_traces, list( (var, ts),(var, ts),(var, ts),.... )
    return:
        unique_var = list(var1,var2,...) ## list of strings
    '''
    unique_var = []
    for rt in raw_trace:
        [var, timestamp] = rt
        # print([var, timestamp])
        if var not in unique_var:
            unique_var += [var]
            # print(rt)
    return unique_var


def generate_map(unique_events):
    '''
    unique_events -> list of all the variables in the code (unique, and in order of logging)
    return:
        event_map -> takes the variable name and gives corresponding event number
        event_remap -> takes event number and gives associated variable name
    '''
    event_map = dict()
    event_remap = dict()
    for i in range(len(unique_events)):
        event_remap[i+1] = unique_events[i]
        event_map[unique_events[i]] = i+1

    return(event_map, event_remap)


def get_ypred_ytrue(detection, ground_truth):
    '''
    detection -> list: detections from the  -> [[(var1, 0), (ts1, ts2), file_name], [], [], ...., []]
    ground_truth -> list: ground truth labels -> [[(ind1, ind2), (ts1, ts2), class], [], [], ...., []]

    return:
    y_pred -> list: [1, 1, 0, 1, 0, 0, ...., 1]
    y_true -> list: [1, 1, 1, 1, 0, 0, ...., 0]
    '''
    
    # gt_pred = np.zeros((len(ground_truth),2,2))   ### list of detections for each gt instance. The index of list denote its respective gt at that index.
    gt_pred = defaultdict(list)
    rest_pred = [] ### list of detections that are not associated with any gt instance
    correct_pred = [] ### list of correct predictions
    y_true = []
    y_pred = []
    print('gt_pred:', gt_pred)
    # print(y_pred, y_true)
    ### check if the detection is within the ground truth (case 1)
    if len(detection) != 0:
        detected_flag = False
        for im, pred in enumerate(detection):
            state1, state2 = pred[0]
            pd_ts1, pd_ts2 = pred[1]
            filename = pred[2]

            if len(ground_truth) != 0:
                for gt_ind ,gt in enumerate(ground_truth):
                    ind1 = gt[0]
                    ind2 = gt[1]
                    gt_ts1 = gt[2]
                    gt_ts2 = gt[3]
                    class_label = gt[4]

                    cond_1 = pd_ts1 > gt_ts1 and pd_ts2 < gt_ts2  ### check if the detection timestamp is within the ground truth timestamp (case 1)
                    cond_2 = pd_ts1 < gt_ts1 and pd_ts2 > gt_ts2  ### check if the gorund truth timestamp is within the detection timestamp (case 2)
                    cond_3 = pd_ts1 > gt_ts1 and pd_ts1 < gt_ts2 and pd_ts2 > gt_ts2    ### partial detection on right of the ground truths, check 5 second difference after this (case 3)
                    cond_4 = pd_ts2 < gt_ts2 and pd_ts2 > gt_ts1 and pd_ts1 < gt_ts1   ### partial detection on left of the ground truths, check 5 second difference after this (case 4)

                    if cond_1:
                        detected_flag = True
                        gt_pred[gt_ind] += [pred]  
                        # case1_pred += [pred]
                        # break ### not for testing, part of code
                    elif cond_2:
                        detected_flag = True
                        gt_pred[gt_ind] += [pred] 
                        # case2_pred += [pred]
                        # break ### not for testing, part of code
                    elif cond_3:
                        if pd_ts2 - gt_ts2 <=  5:   ### check if the detection is within 5 seconds of the ground truth
                            detected_flag = True
                            gt_pred[gt_ind] += [pred] 
                            # break ### not for testing, part of code
                    elif cond_4:
                        if gt_ts1 - pd_ts1 <= 5:
                            detected_flag = True
                            gt_pred[gt_ind] += [pred] 
                            # case34_pred += [pred]
                            # break ### not for testing, part of code
                
            print(im, detected_flag)      
            if not detected_flag:  
                ### FP          
                rest_pred.append(pred)
                y_pred += [1]
                y_true += [0]
            else:
                correct_pred += [pred]
                detected_flag = False

    # print(len(detection), len(case1_pred), len(case2_pred), len(case34_pred), len(rest_pred))
    # assert(len(detection)==(len(case1_pred)+len(case2_pred)+len(case34_pred)+len(rest_pred)))

    for i in range(len(ground_truth)):
        pred = gt_pred[i]

        if len(pred) != 0:
            ### TP
            y_pred += [1]
            y_true += [1]
        else:
            ### FN
            y_pred += [0]
            y_true += [1]
    

    return correct_pred, rest_pred, y_pred, y_true


    

In [None]:
np.zeros((5,1))

## Load Data

In [None]:
############ configuration ################
############################################

CODE = 'theft_protection'       ### application (code)
BEHAVIOUR_FAULTY = 'faulty_data'            ### normal, faulty_data
BEHAVIOUR_NORMAL = 'normal'            ### normal, faulty_data
THREAD = 'single'           ### single, multi
VER = 3                     ### format of data collection

base_dir = '../../trace_data' ### can be replaced with 'csv', 'exe_plot', 'histogram'
normalbase_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR_NORMAL}'
faultybase_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR_FAULTY}'

print(normalbase_path)
print(faultybase_path)

In [None]:

train_base_path = os.path.join(normalbase_path, 'train_data')
train_data_path = [os.path.join(train_base_path, x) for x in os.listdir(train_base_path)]



######### get paths #######################
paths_log, paths_traces, varlist_path, paths_label = get_paths(faultybase_path)

paths_log.sort()
paths_traces.sort()
varlist_path.sort()
paths_label.sort()

# print(paths_log)
# print(paths_traces)
# print(varlist_path)
# print(paths_label)

test_data_path = paths_traces
test_label_path = paths_label

print(train_data_path)
print(test_data_path)
print(test_label_path)


In [None]:
############# check varlist is consistent ############
############# only for version 3 ######################

if VER == 3:
    to_number = is_consistent(varlist_path)

    if to_number != False:
        from_number = mapint2var(to_number)


In [None]:
############ Get variable list ######################
sorted_keys = list(from_number.keys())
sorted_keys.sort()
var_list = [from_number[key] for key in sorted_keys]   ### get the variable list
# print(var_list)

## Confidence Interval

__Confidence Interval:__

A confidence interval is a range around the mean that is likely to contain the true population mean. The formula for a confidence interval is mean ± margin of error mean±margin of error, where the margin of error depends on the desired confidence level and the standard error.

_Example:_

1. Choose a confidence level (e.g., 95%).
2. Calculate the standard error: standard deviation/ sqr_root(number of observations)
3. Calculate the margin of error: critical value × standard error
4. Determine the confidence interval: mean ± margin of error


In [None]:
### initialize exeinz
ei = exeInt()

### Data Processing

In [None]:
### get execution intervals for all variables

exe_list, filewise_exe_list = ei.get_exeint(train_data_path)

In [None]:
### get the confidence intervals for all variables
confidence_intervals = ei.get_confinterval(exe_list)

############ calculate dynamic thresholds ############
thresholds = ei.get_dynamicthresh(exe_list)

In [None]:
thresholds

In [None]:
exe_list[9]

### Visualising Thresholds

In [None]:
#### plot exe_list to vsiualize the distribution of execution intervals
# ei.viz_thresholds(exe_list, confidence_intervals, thresholds)


### Validation

In [None]:
#### Detect anomalies in faulty traces
DIFF_VAL = 2
all_tp = []
all_fp = []
all_detections = [] ### format [file1_detection, file2_detection] -> file1_detection: [(state1, 0), (ts1, ts2), filename]  
y_pred_all = []
y_true_all = []
for test_data, test_label in zip(test_data_path, test_label_path):
    print(test_data, test_label)
    detection = ei.test_single(test_data, thresholds)   ### detection in format: [var, (ts1,ts2), file_name]
    # print(detection)
    dedup_detection, grouped_det = ei.remove_duplicates(detection, DIFF_VAL)  ### remove multiple detections for single ground truth
    detection = dedup_detection
    all_detections += [(test_data, detection, test_label)]  ### used to plot detections

    ### load ground truths
    ground_truth_raw = read_traces(test_label)
    ground_truth = ground_truth_raw['labels']
    label_trace_name = list(ground_truth.keys())[0]
    ground_truth = ground_truth[label_trace_name]
    print('ground truths:', ground_truth)
    print(len(ground_truth))

    correct_pred, rest_pred, y_pred, y_true = get_ypred_ytrue(detection, ground_truth)  ### case1_pred, case2_pred, case34_pred, rest_pred

    all_tp += [(test_data, correct_pred, test_label)]
    all_fp += [(test_data, rest_pred, test_label)]

    y_pred_all.extend(y_pred)
    y_true_all.extend(y_true)

    break

In [None]:
### Evaluation metrics

from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, average_precision_score

# Calculate accuracy
accuracy = accuracy_score(y_true_all, y_pred_all)
print(f"Accuracy: {accuracy:.4f}")

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true_all, y_pred_all)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision
precision = precision_score(y_true_all, y_pred_all)
print(f'Precision: {precision:.4f}')

# Calculate recall
recall = recall_score(y_true_all, y_pred_all)
print(f'Recall: {recall:.4f}')

# Calculate average precision
average_precision = average_precision_score(y_true_all, y_pred_all)
print(f'Average Precision: {average_precision:.4f}')

# Calculate F1 score
f1 = f1_score(y_true_all, y_pred_all)
print(f"F1 Score: {f1:.4f}")

In [None]:
'''
TODO:
- plot the detections (done)
- group the detections for single anomaly that are detected in different variables (done)
- since the detection in these methods are between two events, the gt will usually lie in between the detected timestamps (done)
- once these issues are solved, evaluate the performance
'''

## Plot Detections

In [None]:
### plot gt and detections
for test_data, detections, test_label in all_detections:
    # print('test_data:', test_data)
    # print('detections:', detections)
    # print(test_label)

    ### prepare trace to plot
    col_data = preprocess_traces([test_data])
    all_df = get_dataframe(col_data) 
    # print(all_df[0])

    ### prepare detections to plot
    timestamps = col_data[0][1]
    print('timestamps:', timestamps)
    plot_val = []
    plot_x_ticks = []
    plot_class = []
    for det in detections:
        # print(det)
        det_ts1, det_ts2 = det[1]
        # print(det_ts1, det_ts2)

        det_ind1_pre = [ abs(t-det_ts1) for t in timestamps]
        det_ind1 = det_ind1_pre.index(min(det_ind1_pre))

        det_ind2_pre = [ abs(t-det_ts2) for t in timestamps]
        det_ind2 = det_ind2_pre.index(min(det_ind2_pre))
        # print(det_ind1, det_ind2)
        # print(timestamps[det_ind1], timestamps[det_ind2])

        plot_val += [(det_ind1, det_ind2)]
        plot_x_ticks += [(timestamps[det_ind1], timestamps[det_ind2])]
        plot_class += [0]

    plot_detections = [plot_val, plot_x_ticks, plot_class]

    ### get ground truths
    gt_plot = prepare_gt(test_label)

    ### plot
    for df in all_df:
        # print(df.columns)
        plot_single_trace(df, 
                          var_list, 
                          with_time=False, 
                          is_xticks=True, 
                          detections=plot_detections, 
                          dt_classlist=['detection'],
                          ground_truths=gt_plot,
                          gt_classlist=['gt_communication', 'gt_sensor', 'gt_bitflip'],
                          )

    break

In [None]:
gt_plot