In [2]:
################# state transition machine ##################
import json
import os
import numpy as np
import pandas as pd

def read_traces(log_path):
    '''
    read the trace files and extract variable names
    data = [ [event, timestamp], [], [],......,[] ]
    '''
    with open(log_path, 'r') as f:
        data = json.load(f)
    return data


# Subtrace Generation (faulty, 50)

In [3]:
############ configuration ################
############################################

code = 'theft_protection'       ### application (code)
behaviour = 'faulty_data'            ### normal, faulty_data
thread_typ = 'single'           ### single, multi
version = 2.2                     ### format of data collection
sub_len = 'dynamic'

base_dir = '../trace_data' ### can be replaced with 'csv', 'exe_plot', 'histogram'
log_path = base_dir+f'/{code}/{thread_typ}_thread/version_{version}/{behaviour}'

#### subtraces
subtrace_path = f"data-subtraces/version_{version}/{behaviour}/subtraces/{sub_len}/"
print(log_path)

../trace_data/theft_protection/single_thread/version_2.2/faulty_data


### Get paths to the files

In [4]:

###### get file paths #######

all_files = os.listdir(log_path)
all_files.sort()
logs = []
traces = []
unknown = []
for i in all_files:
    if i.find('log') == 0:
        logs += [i]
    elif i.find('trace') == 0 and i.find('.txt') == -1:
        traces += [i]
    else:
        unknown += [i]

######### path to files
paths_log = [os.path.join(log_path, x) for x in logs]
paths_traces = [os.path.join(log_path, x) for x in traces]
paths_log.sort()
print(paths_log)

['../trace_data/theft_protection/single_thread/version_2.2/faulty_data/log1-comm', '../trace_data/theft_protection/single_thread/version_2.2/faulty_data/log2-bitflip', '../trace_data/theft_protection/single_thread/version_2.2/faulty_data/log3-sensor']


In [4]:
paths_traces

['../trace_data/theft_protection/single_thread/version_2.2/faulty_data/trace1-comm',
 '../trace_data/theft_protection/single_thread/version_2.2/faulty_data/trace2-bitflip',
 '../trace_data/theft_protection/single_thread/version_2.2/faulty_data/trace3-sensor']

### Generate data samples (size=50)

In [None]:
########## generate raw data from traces ###########

# col_data = []
# for (p,w) in zip(paths_traces, traces):
#     trace = read_traces(p)
#     print(p,w)

#     ### path to save data samples
#     write_path = subtrace_path
#     print(write_path)

#     counter = 0
#     for i in range(0,len(trace),50):
#         if i==0:
#             ### take samples from 0 to 50
#             sample = trace[i:i+51]
#             np.save(write_path+f'{w}_{i}_{i+50}', sample, allow_pickle=False)
#             # print(len(sample))
#         elif len(trace) - i >= 50:
#             ### take samples from 50 to 99
#             sample = trace[i:i+51]
#             np.save(write_path+f'{w}_{i}_{i+50}', sample, allow_pickle=False)
#             # print(len(sample))
#         else:
#             sample = trace[i:]
#             np.save(write_path+f'{w}_{i}_{len(trace)}', sample, allow_pickle=False)
#             # print(len(sample))
#         counter += 1
#         print(counter)

#     # break

# State Transition Labels- instances

In [5]:
### get files from subtraces
all_subtraces = os.listdir(subtrace_path)
all_subtraces.remove('.DS_Store')

In [8]:
### generate label files

for sub in all_subtraces:
    sub_path = os.path.join(subtrace_path, sub)
    label_path = 'state transition/data/unlabelled/'
    # isPath = os.path.exists(os.path.dirname(label_path)) ### check if the path exists
    # ### create the folder if it does not exist
    # if not isPath:
    #     os.makedirs(os.path.dirname(label_path))
    # print(sub)
    subtrace = np.load(sub_path)
    start_count = sub.split('_')[1]
    #print(start_count)

    # print(subtrace)
    all_rows = []
    for ind, (event1, event2) in enumerate(zip(subtrace[0:-1], subtrace[1:])):
        # print(event1,event2)
        var1, var2 = event1[0], event2[0]
        ts1, ts2 = int(event1[1]), int(event2[1])
        data_row = [int(start_count)+ind, var1, var2, ts1, ts2, 0]
        # print(data_row)
        all_rows += [data_row]

    columns = ['ind', 's1', 's2', 'ts1', 'ts2', 'label']
    df_sub = pd.DataFrame(all_rows, columns=columns)
    excel_file_path = label_path + sub.replace('.npy', '.xlsx')

    ############# uncomment to save files
    # df_sub.to_excel(excel_file_path, index=False)



trace1-comm_5150_5200.npy
trace2-bitflip_5800_5850.npy
trace1-comm_3650_3700.npy
trace2-bitflip_3850_3900.npy
trace1-comm_8750_8800.npy
trace1-comm_5600_5650.npy
trace1-comm_8150_8200.npy
trace2-bitflip_10500_10550.npy
trace1-comm_5750_5800.npy
trace1-comm_7700_7750.npy
trace1-comm_8600_8650.npy
trace2-bitflip_8800_8850.npy
trace2-bitflip_7900_7950.npy
trace3-sensor_9450_9500.npy
trace2-bitflip_7050_7100.npy
trace2-bitflip_1000_1050.npy
trace3-sensor_2400_2450.npy
trace2-bitflip_3100_3150.npy
trace3-sensor_4450_4500.npy
trace3-sensor_7950_8000.npy
trace1-comm_8850_8900.npy
trace1-comm_9950_10000.npy
trace2-bitflip_3750_3800.npy
trace2-bitflip_1700_1750.npy
trace1-comm_1900_1950.npy
trace3-sensor_6250_6300.npy
trace2-bitflip_8650_8700.npy
trace2-bitflip_3150_3200.npy
trace1-comm_3800_3850.npy
trace2-bitflip_5650_5700.npy
trace1-comm_5850_5900.npy
trace3-sensor_2300_2350.npy
trace2-bitflip_3600_3650.npy
trace1-comm_5100_5150.npy
trace1-comm_10050_10100.npy
trace3-sensor_5350_5400.npy
tra

# Clustering - instances and labels

## labels for subtraces (len 50)

In [3]:
### get files from subtraces
all_subtraces = os.listdir(subtrace_path)
all_subtraces.remove('.DS_Store')

## labels for traces

In [20]:
paths_traces

['../trace_data/theft_protection/single_thread/version_2.2/faulty_data/trace1-comm',
 '../trace_data/theft_protection/single_thread/version_2.2/faulty_data/trace2-bitflip',
 '../trace_data/theft_protection/single_thread/version_2.2/faulty_data/trace3-sensor']

### substitute zero

In [5]:
### generate label files (only single exe inter per instances, subsitute zero for other variables)

### load var_list
_var_list = np.load('var_list.npy', allow_pickle=False)
_var_list=tuple(_var_list)

for tr in paths_traces:
    ### paths
    label_path = subtrace_path.replace('/subtraces', '/clustering_instances_labels') + '/trace_labels/'
    print(tr)
    
    ### load file
    trace = read_traces(tr)
    print(trace)

    exe_list = np.zeros(len(_var_list))  ### list to store the execution interval to create instances
    prev_exe = np.zeros(len(_var_list))  ### list to store the previous execution time of each variable

    instances = []  ### list to store the instances
    create_instance = False  ### flag to indicate any element in exe_list is not 0
    for ind, event in enumerate(trace):
        # print(event)
        var, ts = event[0], int(event[1])
        event_ind = _var_list.index(var)
        trace_ind = ind
        # print(trace_ind, exe_list, create_instance)
        # print(trace_ind, prev_exe)

        ### if the first instance of variable in log file then update the prev_exe list
        if prev_exe[event_ind] == 0:
            prev_exe[event_ind] = ts
        else:
            ### calculate the execution interval
            exe_inter = ts - prev_exe[event_ind]
            prev_exe[event_ind] = ts
            exe_list[event_ind] = exe_inter

        ### if atleast one exe_inter is calculated save the instance. To avoid instances with all parameters as 0
        if any(element != 0 for element in exe_list):
            create_instance = True

        if create_instance:
            # print(trace_ind, exe_list, create_instance)
            instances += [(trace_ind,tuple(exe_list), 0)]     ### format of instance (index, [exe_inter], label)
            exe_list = np.zeros(len(_var_list))  ### list to store the execution interval to create instances
            create_instance = False

    columns = ['ind', 'exe_inter', 'label']
    df_sub = pd.DataFrame(instances, columns=columns)
    excel_file_path = label_path + os.path.basename(tr) + '.xlsx'

    # ############ uncomment to save files
    df_sub.to_excel(excel_file_path, index=False)
    
        


../trace_data/theft_protection/single_thread/version_2.2/faulty_data/trace1-comm
[['1_0_main_ow', 1394], ['1_0_main_temp', 1421], ['1_0_main_lora', 1441], ['1_0_main_s', 1446], ['1_0_main_com_timer', 1954], ['1_control_init_timer0_0', 1959], ['1_0_main_i', 1962], ['1_0_sense_temperature', 1988], ['1_0_main_temperature', 3004], ['1_control_updatedata_cls.sensor_data', 3008], ['1_0_main_i', 3013], ['1_0_sense_temperature', 3038], ['1_0_main_temperature', 4054], ['1_control_updatedata_cls.sensor_data', 4058], ['1_0_main_i', 4063], ['1_0_sense_temperature', 4088], ['1_0_main_temperature', 5104], ['1_control_updatedata_cls.sensor_data', 5108], ['1_0_main_i', 5113], ['1_0_sense_temperature', 5138], ['1_0_main_temperature', 6154], ['1_control_updatedata_cls.sensor_data', 6159], ['1_0_main_i', 6163], ['1_0_sense_temperature', 6188], ['1_0_main_temperature', 7205], ['1_control_updatedata_cls.sensor_data', 7209], ['1_control_readdata_0', 7213], ['1_0_loracom_data', 7218], ['1_control_update_txms

In [20]:
os.path.basename(tr)


'trace3-sensor'

### last values

In [6]:
### generate label files (only single exe inter per instances, subsitute zero for other variables)

### load var_list
_var_list = np.load('var_list.npy', allow_pickle=False)
_var_list=tuple(_var_list)

for tr in paths_traces:
    ### paths
    label_path = subtrace_path.replace('/subtraces', '/clustering_instances_labels') + '/trace_labels/'
    print(tr)
    
    ### load file
    trace = read_traces(tr)
    print(trace)

    exe_list = np.zeros(len(_var_list))  ### list to store the execution interval to create instances
    prev_exe = np.zeros(len(_var_list))  ### list to store the previous execution time of each variable

    instances = []  ### list to store the instances
    create_instance = False  ### flag to indicate any element in exe_list is not 0
    for ind, event in enumerate(trace):
        # print(event)
        var, ts = event[0], int(event[1])
        event_ind = _var_list.index(var)
        trace_ind = ind
        # print(trace_ind, exe_list, create_instance)
        # print(trace_ind, prev_exe)

        ### if the first instance of variable in log file then update the prev_exe list
        if prev_exe[event_ind] == 0:
            prev_exe[event_ind] = ts
        else:
            ### calculate the execution interval
            exe_inter = ts - prev_exe[event_ind]
            prev_exe[event_ind] = ts
            exe_list[event_ind] = exe_inter

        ### if atleast one exe_inter is calculated save the instance. To avoid instances with all parameters as 0
        if create_instance == False:
            if any(element != 0 for element in exe_list):
                create_instance = True

        if create_instance:
            # print(trace_ind, exe_list, create_instance)
            instances += [(trace_ind,tuple(exe_list), 0)]     ### format of instance (index, [exe_inter], label)

    columns = ['ind', 'exe_inter', 'label']
    df_sub = pd.DataFrame(instances, columns=columns)
    excel_file_path = label_path + os.path.basename(tr) + '.xlsx'

    # ############ uncomment to save files
    df_sub.to_excel(excel_file_path, index=False)
    
        


../trace_data/theft_protection/single_thread/version_2.2/faulty_data/trace1-comm
[['1_0_main_ow', 1394], ['1_0_main_temp', 1421], ['1_0_main_lora', 1441], ['1_0_main_s', 1446], ['1_0_main_com_timer', 1954], ['1_control_init_timer0_0', 1959], ['1_0_main_i', 1962], ['1_0_sense_temperature', 1988], ['1_0_main_temperature', 3004], ['1_control_updatedata_cls.sensor_data', 3008], ['1_0_main_i', 3013], ['1_0_sense_temperature', 3038], ['1_0_main_temperature', 4054], ['1_control_updatedata_cls.sensor_data', 4058], ['1_0_main_i', 4063], ['1_0_sense_temperature', 4088], ['1_0_main_temperature', 5104], ['1_control_updatedata_cls.sensor_data', 5108], ['1_0_main_i', 5113], ['1_0_sense_temperature', 5138], ['1_0_main_temperature', 6154], ['1_control_updatedata_cls.sensor_data', 6159], ['1_0_main_i', 6163], ['1_0_sense_temperature', 6188], ['1_0_main_temperature', 7205], ['1_control_updatedata_cls.sensor_data', 7209], ['1_control_readdata_0', 7213], ['1_0_loracom_data', 7218], ['1_control_update_txms

# Examine Subtraces 

In [8]:
############ configuration ################
############################################

code = 'theft_protection'       ### application (code)
behaviour = 'faulty_data'            ### normal, faulty_data
thread_typ = 'single'           ### single, multi
version = 2.2                     ### format of data collection
sub_len = 50

base_dir = 'data-subtraces' ### can be replaced with 'csv', 'exe_plot', 'histogram'
normal_path = base_dir+f'/version_{version}/{behaviour}/subtraces/{sub_len}/normal'
anomalies_path = base_dir+f'/version_{version}/{behaviour}/subtraces/{sub_len}/anomalies'
print(normal_path, anomalies_path)

data-subtraces/version_2.2/faulty_data/subtraces/50/normal data-subtraces/version_2.2/faulty_data/subtraces/50/anomalies


In [9]:
normal_files = os.listdir(normal_path)
if '.DS_Store' in normal_files:
    normal_files.remove('.DS_Store')

anomalies_files = os.listdir(anomalies_path)
if '.DS_Store' in anomalies_files:
    anomalies_files.remove('.DS_Store')

normal_files = [os.path.join(normal_path, x) for x in normal_files]
anomalies_files = [os.path.join(anomalies_path, x) for x in anomalies_files]


# Dynamic Subtrace - Training

In [21]:
############ configuration ################
############################################

code = 'theft_protection'       ### application (code)
behaviour = 'normal'            ### normal, faulty_data
thread_typ = 'single'           ### single, multi
version = 2.2                     ### format of data collection
sub_len = 'dynamic'

base_dir = '../trace_data' ### can be replaced with 'csv', 'exe_plot', 'histogram'
log_path = base_dir+f'/{code}/{thread_typ}_thread/version_{version}/{behaviour}'

#### subtraces
subtrace_path = f"data-subtraces/version_{version}/{behaviour}/subtraces/{sub_len}/"
print(log_path)

../trace_data/theft_protection/single_thread/version_2.2/normal


In [22]:
### get files ####

paths_traces = log_path + '/trace1'

In [24]:
trace1_normalindices = []

'../trace_data/theft_protection/single_thread/version_2.2/normal/trace1'