In [None]:
import os
from libv3.utils import *
import pandas as pd
import json

In [None]:
############ configuration ################
############################################

CODE, BEHAVIOUR, THREAD, VER = get_config()   ### config stored in libv3/exp_config.txt
VER = 4
print('VER:', VER)
base_dir = '../trace_data' ### can be replaced with 'csv', 'exe_plot', 'histogram'
log_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR}'

print(log_path)

#### file to display
trace_file = 0

print('file number:', trace_file)

In [None]:
######### get paths #######################
paths_log, paths_traces, varlist_path, paths_label = get_paths(log_path)

### remove.Ds_store from all lists
paths_log = [x for x in paths_log if '.DS_Store' not in x]
paths_traces = [x for x in paths_traces if '.DS_Store' not in x]
varlist_path = [x for x in varlist_path if '.DS_Store' not in x]
paths_label = [x for x in paths_label if '.DS_Store' not in x]

paths_log.sort()
paths_traces.sort()
varlist_path.sort()

print(paths_log)
print(paths_traces)
print(varlist_path)
print(paths_label)

In [None]:
############# check varlist is consistent ############
############# only for version 3 ######################

if VER == 3 or VER == 4:
    check_con, _ = is_consistent(varlist_path)

    if check_con != False:
        to_number = read_json(varlist_path[0])
        from_number = mapint2var(to_number)
        print('varlist is consistent')
    else:
        print('varlist is not consistent')


In [None]:
to_number

In [None]:
############ Get variable list ######################
sorted_keys = list(from_number.keys())
sorted_keys.sort()
var_list = [from_number[key] for key in sorted_keys]   ### get the variable list
# print(var_list)

In [None]:
var_list

## Trace Plots

### Process Traces

In [None]:
########## process the traces ###########
col_data = preprocess_traces(paths_traces, var_list)   ### in the format (trace_name, x_data, y_data, y_labels, trace_path) 

### Generate plot trace data

In [None]:
############### preprocessing data to plot using plotly ##############
'''
Restructure the data in dictionary with (keys,value) pair :-  (time, timestamps) , (trace_name, trace)
'''
all_df = get_dataframe(col_data)   

In [None]:
all_df[0]

In [None]:

### plot single trace
for i, df in enumerate(all_df):
    
    if i == trace_file:
        trace_obj = plot_single_trace(df, var_list, with_time=False, is_xticks=True)
        trace_obj.show()

In [None]:
### get timestamp

timestamp = index2timestamp(all_df[0], 227)
print(timestamp)

## Execution Interval Plots

### Preprocess data

In [None]:
####### preprocessing data ########

#### extract timestamps for each variable and store them in a dictionary along with index values for each variable in event trace

var_timestamps = get_var_timestamps(paths_traces=paths_traces)    #### in format (filename, dict of timestamps and index values)

to_plot = preprocess_variable_plotting(var_timestamps, var_list, from_number, trace_number=trace_file)   ### restructure the data for plotting

threshold_path = [f'../trace_data/{CODE}/single_thread/version_{VER}/faulty_data/thresholds.json']
if os.path.exists(threshold_path[0]):
    thresholds_var = read_json(threshold_path[0])
    print('Loading threshold file')
else:
    print('Threshold file does not exist')

In [None]:
for (name, log_names, xy_data) in to_plot:
    print(name, log_names, xy_data)
    

### Generate execution interval plots

In [None]:
### plot the data
plot_list = plot_execution_interval_single(to_plot, is_xticks=False, thresholds=thresholds_var)
for plot in plot_list:
    plot.show()

## Plot Labels

In [None]:
### count and prepare labels to plot
'''
labels are of format [index1, index2, timestamp1, timestamp2, class]
'''
class_count = defaultdict(int)
for i, path in enumerate(paths_label):
    label_content = prepare_gt(path)
    ind, ts, cls = label_content
    # print(ind, ts, cls)
    for c in cls:
        class_count[c]+=1
        
    if i == trace_file:
        print(path)
        toplot_gt = label_content

    print(os.path.split(path)[-1], class_count)

    # break
for key, val in class_count.items():
    print(key, val)

In [None]:
toplot_gt

In [None]:
### plot
for i, df in enumerate(all_df):
    if i == trace_file:
        plt_obj = plot_single_trace(df, var_list, with_time=False, is_xticks=True, ground_truths=toplot_gt)
        plt_obj.show()


## Generate train data

In [None]:
'''
Add the indices for the interval in 'normal_seq_inter' that show correct behvaiour in the traces. 
The format is as follows:
path_traces = [path1, path2, ...]
normal_seq_inter = [ ( intervals for trace1, ...), 
                    ( intervals for trace2 ...),
                      ...] 
'''

# normal_seq_inter = (  ( (0,340), (500,700) ),  ### v4, normal trace0
#                     ( (0,350), (700,1500), (2000,27400) ),  ### v4, normal trace1
#             )

normal_seq_inter = (  ( (0,110), (150,900), (1250,2000), (2050,2300) ),  ### v4, normal trace0
            )

for p, n_inter in zip(paths_traces, normal_seq_inter):
    trace = read_traces(p)
    train_data_path = os.path.join(os.path.dirname(p), 'train_data')

    if not os.path.exists(train_data_path):
        os.makedirs(train_data_path)

    for i, inter in enumerate(n_inter):
        start, end = inter
        # trace[start:end].to_csv(os.path.join(train_data_path, f'interval_{start}_{end}.csv'), index=False)
        json.dump(trace[start:end], open(os.path.join(train_data_path, f'interval_{start}_{end}.json'), 'w'))
        print(f'interval_{start}_{end}.json saved in {train_data_path}')

In [None]:
normal_seq_inter[0]