# State Transition - only states

In [None]:
import numpy as np
import os
from libraries.utils import *
from libraries.state_transition import StateTransition as st
import pandas as pd
from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle


## Load Data

In [None]:
############ configuration ################
############################################

CODE = 'theft_protection'       ### application (code)
BEHAVIOUR_FAULTY = 'faulty_data'            ### normal, faulty_data
BEHAVIOUR_NORMAL = 'normal'            ### normal, faulty_data
THREAD = 'single'           ### single, multi
VER = 3                     ### format of data collection

base_dir = '../../trace_data' ### can be replaced with 'csv', 'exe_plot', 'histogram'
normalbase_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR_NORMAL}'
faultybase_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR_FAULTY}'

print(normalbase_path)
print(faultybase_path)


In [None]:

train_base_path = os.path.join(normalbase_path, 'train_data')
train_data_path = [os.path.join(train_base_path, x) for x in os.listdir(train_base_path)]
print(train_data_path)


######### get paths #######################
paths_log, paths_traces, varlist_path, paths_label = get_paths(faultybase_path)

### remove.Ds_store from all lists
paths_log = [x for x in paths_log if '.DS_Store' not in x]
paths_traces = [x for x in paths_traces if '.DS_Store' not in x]
varlist_path = [x for x in varlist_path if '.DS_Store' not in x]
paths_label = [x for x in paths_label if '.DS_Store' not in x]

paths_log.sort()
paths_traces.sort()
varlist_path.sort()
paths_label.sort()

print(paths_log)
print(paths_traces)
print(varlist_path)
print(paths_label)

test_data_path = paths_traces
test_label_path = paths_label


In [None]:
############# check varlist is consistent ############
############# only for version 3 ######################

if VER == 3:
    to_number = is_consistent(varlist_path)

    if to_number != False:
        from_number = mapint2var(to_number)


In [None]:
############ Get variable list ######################
sorted_keys = list(from_number.keys())
sorted_keys.sort()
var_list = [from_number[key] for key in sorted_keys]   ### get the variable list
# print(var_list)

In [None]:
print(train_data_path[0])
print(train_data_path[0].find('.npy'))
print(train_data_path[0].find('.json') )

if train_data_path[0].find('.npy') != -1:
    sample_data = load_sample(train_data_path[0])
    print('.npy')
elif train_data_path[0].find('.json') != -1:
    sample_data = read_traces(train_data_path[0])
    print('.json')

## Training

In [None]:
### initialize
model = st()
model.train(train_data_path)

In [None]:
transitions = model.transitions
transitions

In [None]:
### viz transitions

for key in transitions.keys():
    print(from_number[key], ':', end=' ')
    for val in transitions[key]:
        print(from_number[val], end=', ')
    print('\n')

## Validation

In [None]:
#### Validate model
all_detections = []  ### format [file1_detection, file2_detection] -> file1_detection: [(state1, state2), (ts1, ts2), filename]
y_pred_all = []
y_true_all = []
all_tp = []
all_fp = []
for test_data, test_label in zip(test_data_path, test_label_path):
    detection = model.test_single(test_data)
    all_detections += [(test_data, detection, test_label)]  ### used to plot detections
    print('Detections:', detection)
    print(len(detection))


    ground_truth_raw = read_traces(test_label)
    ground_truth = ground_truth_raw['labels']
    label_trace_name = list(ground_truth.keys())[0]
    ground_truth = ground_truth[label_trace_name]
    print('ground truths:', ground_truth)
    print(len(ground_truth))

    correct_pred, rest_pred, y_pred, y_true = model.get_correct_detections(detection, ground_truth)

    y_pred_all.extend(y_pred)
    y_true_all.extend(y_true)
    all_tp += [(test_data, correct_pred, test_label)]
    all_fp += [(test_data, rest_pred, test_label)]
    


# result = model.test(test_data_path)

In [None]:
print('y_pred', y_pred_all)
print('y_true', y_true_all)
print(len(y_true_all))

In [None]:
### Evaluation metrics

from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, average_precision_score, ConfusionMatrixDisplay


# Calculate precision
precision = precision_score(y_true_all, y_pred_all)
print(f'Precision: {precision:.4f}')

# Calculate recall
recall = recall_score(y_true_all, y_pred_all)
print(f'Recall: {recall:.4f}')

# # Calculate average precision
# average_precision = average_precision_score(y_true_all, y_pred_all)
# print(f'Average Precision: {average_precision:.4f}')

# Calculate F1 score
f1 = f1_score(y_true_all, y_pred_all)
print(f"F1 Score: {f1:.4f}")

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true_all, y_pred_all)
print("Confusion Matrix:")
print(conf_matrix)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=['normal', 'anomaly'])
disp.plot()

## Plot Detections

In [None]:
### plot gt and detections
# for test_data, detections, test_label_path in all_detections:
for test_data, detections, test_label_path in all_tp:
    print('test_data:', test_data)
    print('detections:', detections)

    ### prepare trace to plot
    col_data = preprocess_traces([test_data])
    all_df = get_dataframe(col_data) 
    # print(all_df[0])

    ### prepare detections to plot
    timestamps = col_data[0][1]
    print('timestamps:', timestamps)
    plot_val = []
    plot_x_ticks = []
    plot_class = []
    for det in detections:
        print(det)
        det_ts1, det_ts2 = det[1]
        print(det_ts1, det_ts2)

        det_ind1_pre = [ abs(t-det_ts1) for t in timestamps]
        det_ind1 = det_ind1_pre.index(min(det_ind1_pre))

        det_ind2_pre = [ abs(t-det_ts2) for t in timestamps]
        det_ind2 = det_ind2_pre.index(min(det_ind2_pre))
        # print(det_ind1, det_ind2)
        # print(timestamps[det_ind1], timestamps[det_ind2])

        plot_val += [(det_ind1, det_ind2)]
        plot_x_ticks += [(timestamps[det_ind1], timestamps[det_ind2])]
        plot_class += [0]

    plot_detections = [plot_val, plot_x_ticks, plot_class]

    ### get ground truths
    gt_plot = prepare_gt(test_label_path)

    ### plot
    for df in all_df:
        # print(df.columns)
        plot_single_trace(df, 
                          var_list, 
                          with_time=False, 
                          is_xticks=True, 
                          detections=plot_detections, 
                          dt_classlist=['detection'],
                          ground_truths=gt_plot,
                          gt_classlist=['gt_communication', 'gt_sensor', 'gt_bitflip'],
                          )

    # break

### generate transition table

In [None]:
import pandas as pd
from pptx import Presentation
from pptx.util import Inches
from pptx.util import Pt
from pptx.util import Inches
from pptx.dml.color import RGBColor

# Create a DataFrame from the dictionary
df = pd.DataFrame(list(transitions.items()), columns=['Key', 'Values'])

# Combine values for each key into a single cell
df_combined = df.groupby('Key')['Values'].agg(lambda x: ', '.join(map(str, x))).reset_index()

# Create a PowerPoint presentation
presentation = Presentation()

# Add a slide to the presentation
slide_layout = presentation.slide_layouts[5]  # Using a blank slide layout
slide = presentation.slides.add_slide(slide_layout)

# Define the position and size of the table
left = Inches(1)
top = Inches(1)
width = Inches(6)
height = Inches(4)

# Add a table shape to the slide
table = slide.shapes.add_table(rows=df_combined.shape[0] + 1, cols=df_combined.shape[1], left=left, top=top, width=width, height=height).table

# Add column names to the first row
for col, col_name in enumerate(df_combined.columns):
    cell = table.cell(0, col)
    cell.text = col_name
    cell.text_frame.text = col_name
    cell.text_frame.paragraphs[0].font.size = Pt(10)
    cell.text_frame.paragraphs[0].font.bold = True
    cell.fill.solid()
    cell.fill.fore_color.rgb = RGBColor(240, 240, 240)  # Light gray background color

# Add data to the table
for row in range(df_combined.shape[0]):
    for col in range(df_combined.shape[1]):
        cell = table.cell(row + 1, col)
        cell.text = str(df_combined.iloc[row, col])
        cell.text_frame.text = str(df_combined.iloc[row, col])
        cell.text_frame.paragraphs[0].font.size = Pt(10)

# Save the PowerPoint presentation
presentation.save('table_presentation.pptx')



In [None]:
############ Evaluation ############
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Calculate F1 score
f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1:.4f}")

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision:.4f}')

# Calculate recall
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall:.4f}')



