# State Transition

In [1]:
import numpy as np
import os
from libraries.utility import load_sample
from libraries.state_transition import StateTransition as st
import pandas as pd
from collections import defaultdict

In [2]:
############ configuration ################
############################################

code = 'theft_protection'       ### application (code)
behaviour = 'faulty_data'            ### normal, faulty_data
thread_typ = 'single'           ### single, multi
version = 2.2                     ### format of data collection

base_dir = '../data-subtraces' ### can be replaced with 'csv', 'exe_plot', 'histogram'
train_label_path = base_dir+f'/version_{version}/{behaviour}/train_label'
test_label_path = base_dir+f'/version_{version}/{behaviour}/test_label'
print(train_label_path)

#### fetch files from labels


../data-subtraces/version_2.2/faulty_data/train_label


### Get file paths

In [9]:
### prepare train and test data
'''
train_data :
    absolute path to the sample files (.npy) containing event traces of length 50 (can be longer and shorter), these event traces will be a part of a bigger trace.
    event traces -> list( ['1_control_updatedata_cls.sensor_data' '997892'], ['1_control_readdata_0' '997896'], ['1_0_loracom_data' '997901'],..... )

test_data:
    same as above

labels:
    absolute path to .xlsx files containing transion between two event and its label. 
    labels ->   ind | s1 | s2 | ts1 | ts2 | label |   -> tha labels should be in given format and column heading
'''
train_labels = os.listdir(train_label_path)
if '.DS_Store' in train_labels:
    train_labels.remove('.DS_Store')
train_labels = [os.path.join(train_label_path, x) for x in train_labels]
train_data = [x.replace('train_label', 'subtraces').replace('.xlsx', '.npy') for x in train_labels]

test_labels = os.listdir(test_label_path)
if '.DS_Store' in test_label_path:
    test_label_path.remove('.DS_Store')
test_labels = [os.path.join(test_label_path, x) for x in test_labels]
test_data = [x.replace('test_label', 'subtraces').replace('.xlsx', '.npy') for x in test_labels]

## Training

In [4]:
### initialize
model = st()
model.train(train_data)

../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_750_800.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_350_400.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_150_200.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_0_50.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_100_150.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_800_850.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_300_350.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_850_900.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_250_300.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_200_250.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_50_100.npy
../data-subtraces/version_2.2/faulty_data/subtraces/trace1-comm_900_950.npy


In [5]:
transitions = model.transitions

In [6]:
transitions

defaultdict(list,
            {'1_0_main_temperature': ['1_control_updatedata_cls.sensor_data'],
             '1_control_updatedata_cls.sensor_data': ['1_control_readdata_0',
              '1_0_main_g_ack',
              '1_0_main_i'],
             '1_control_readdata_0': ['1_0_loracom_data'],
             '1_0_loracom_data': ['1_control_update_txmsg_0'],
             '1_control_update_txmsg_0': ['1_0_main_i'],
             '1_0_main_i': ['1_0_sense_temperature'],
             '1_0_sense_temperature': ['1_0_main_temperature'],
             '1_0_main_g_ack': ['1_0_main_events'],
             '1_0_main_events': ['1_control_update_rxmsg_drop'],
             '1_control_update_rxmsg_drop': ['1_0_main_i'],
             '1_0_main_ow': ['1_0_main_temp'],
             '1_0_main_temp': ['1_0_main_lora'],
             '1_0_main_lora': ['1_0_main_s'],
             '1_0_main_s': ['1_0_main_com_timer'],
             '1_0_main_com_timer': ['1_control_init_timer0_0'],
             '1_control_init_time

## Validation

In [10]:
#### Validate model
result = model.test(test_data)

Anomaly Detected: [('1_0_main_temperature', '709956'), ('1_0_main_i', '711305'), 'trace3-sensor_3400_3450.npy']
Anomaly Detected: [('1_0_main_temperature', '712347'), ('1_control_readdata_0', '713696'), 'trace3-sensor_3400_3450.npy']
Anomaly Detected: [('1_0_main_temperature', '714753'), ('1_0_main_g_ack', '716111'), 'trace3-sensor_3400_3450.npy']
Anomaly Detected: [('1_0_main_temperature', '717167'), ('1_control_readdata_0', '718503'), 'trace3-sensor_3400_3450.npy']
Anomaly Detected: [('1_0_main_temperature', '719562'), ('1_0_main_g_ack', '720904'), 'trace3-sensor_3400_3450.npy']
Anomaly Detected: [('1_0_main_temperature', '1327786'), ('1_0_main_i', '1329072'), 'trace3-sensor_6200_6250.npy']
Anomaly Detected: [('1_0_main_temperature', '1330115'), ('1_control_readdata_0', '1331430'), 'trace3-sensor_6200_6250.npy']
Anomaly Detected: [('1_0_main_temperature', '1332492'), ('1_0_main_g_ack', '1333782'), 'trace3-sensor_6200_6250.npy']
Anomaly Detected: [('1_0_main_temperature', '1334842'), 

In [12]:
########################
#### Extract the ground truth
########################
# count=0
# ground_truth = defaultdict(list)  ### labels of the events that are anomalous
# for lab in test_labels:
#     # print(lab)
#     file_name = os.path.basename(lab).removesuffix('.xlsx')
#     labels = pd.read_excel(lab)
#     columns = labels.columns
#     # print(labels)
#     for index, row in labels.iterrows():
#         if row['label'] == 1:
#             count+=1
#             ground_truth[file_name] += [[(row['s1'],row['ts1']), (row['s2'],row['ts2']), row['ind']]]

count=0
ground_truth = list()  ### labels of the events that are anomalous
for lab in test_labels:
    # print(lab)
    file_name = os.path.basename(lab).removesuffix('.xlsx')
    labels = pd.read_excel(lab)
    columns = labels.columns
    # print(labels)
    for index, row in labels.iterrows():
        if row['label'] == 1:
            count+=1
            ground_truth += [[(row['s1'],row['ts1']), (row['s2'],row['ts2']), file_name, row['ind']]]


In [13]:
#########################
########## Evaluate Results
#########################

# ### ground truth for metrics
# y_true = []
# detected = False
# for pred in result:
#     print(pred)
#     p_file = pred[2].removesuffix('.npy')
#     ps1, pts1 = pred[0]
#     ps2, pts2 = pred[1]
#     # print(p_file)
#     if p_file in ground_truth:
#         events = ground_truth[p_file]
#         for gt in events:
#             (gs1,gts1), (gs2,gts2), ind = gt[0], gt[1], gt[2]
#             # print(ps1, pts1,ps2, pts2, gs1,gts1, gs2,gts2)
#             # print( ps1==gs1 and str(pts1)==str(gts1) and ps2==gs2 and str(pts2)==str(gts2) )   
    
#             if ps1==gs1 and str(pts1)==str(gts1) and ps2==gs2 and str(pts2)==str(gts2):
#                 detected = True
#                 break ### not for testing, part of code

#     if detected==True:
#         y_true.append(1)
#         detected=False
#     else:
#         y_true.append(0)

# y_true = np.array(y_true)

# ### predictions for metrics
# y_pred = np.ones(len(result))

#############################################################################################

### ground truth for metrics
y_pred = np.zeros(len(ground_truth))
y_true = np.ones(len(ground_truth))
detected = False
for im, pred in enumerate(result):
    # print(pred)
    p_file = pred[2].removesuffix('.npy')
    ps1, pts1 = pred[0]
    ps2, pts2 = pred[1]
    # print(p_file)
    for gt in ground_truth:
        (gs1,gts1), (gs2,gts2), g_file, ind = gt[0], gt[1], gt[2], gt[3]
        # print(ps1, pts1,ps2, pts2, gs1,gts1, gs2,gts2)
        # print( p_file, g_file )
            
        if ps1==gs1 and str(pts1)==str(gts1) and ps2==gs2 and str(pts2)==str(gts2) and p_file==g_file:
            detected = True
            # print(pred, gt)
            ### remove all the detected instances to check which instances not detected
            ground_truth.remove(gt)
            break ### not for testing, part of code

    if detected==True:
        y_pred[im] = 1
        detected=False
    else:
        print(pred)

    

In [14]:
(y_pred)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [18]:
ground_truth

[[('1_control_updatedata_cls.sensor_data', 634016),
  ('1_0_main_i', 634021),
  'trace1-comm_3100_3150',
  3140],
 [('1_control_updatedata_cls.sensor_data', 54886),
  ('1_0_main_g_ack', 54890),
  'trace2-bitflip_300_350',
  303],
 [('1_0_main_g_ack', 54890),
  ('1_0_main_events', 54895),
  'trace2-bitflip_300_350',
  304],
 [('1_control_updatedata_cls.sensor_data', 55946),
  ('1_0_main_g_ack', 55951),
  'trace2-bitflip_300_350',
  309],
 [('1_0_main_g_ack', 55951),
  ('1_0_main_events', 55955),
  'trace2-bitflip_300_350',
  310],
 [('1_0_main_g_ack', 57024),
  ('1_0_main_events', 57028),
  'trace2-bitflip_300_350',
  319],
 [('1_control_updatedata_cls.sensor_data', 39994),
  ('1_0_main_g_ack', 39998),
  'trace2-bitflip_200_250',
  207],
 [('1_0_main_g_ack', 39998),
  ('1_0_main_events', 40003),
  'trace2-bitflip_200_250',
  208],
 [('1_control_updatedata_cls.sensor_data', 41054),
  ('1_0_main_g_ack', 41058),
  'trace2-bitflip_200_250',
  213],
 [('1_0_main_g_ack', 41058),
  ('1_0_main_

In [19]:
### Evaluation metrics

from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision
precision = precision_score(y_true, y_pred)
print(f'Precision: {precision:.4f}')

# Calculate recall
recall = recall_score(y_true, y_pred)
print(f'Recall: {recall:.4f}')

# Calculate F1 score
f1 = f1_score(y_true, y_pred)
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.6144
Confusion Matrix:
[[ 0  0]
 [59 94]]
Precision: 1.0000
Recall: 0.6144
F1 Score: 0.7611
