In [None]:
import numpy as np
import plotly
import json
import os
import plotly.express as px
import numpy as np
import statistics as st
import pandas as pd

def read_logs(log_path):
    '''
    read the log files and extract variable names
    '''
    with open(log_path, 'r') as f:
        data = json.load(f)
        #print(data)
    var = data.keys()   ### variables in the code
    return(var, data)

def cal_interval(time_list):
    '''
    time_list = list of timestamps each time the variable is executed
    return ->
        [exe interval] = list of execution intervals of consecutive executions
    '''
    interval_list = []
    for i in range(1,len(time_list)):
        prev_time = time_list[i-1]
        next_time = time_list[i]
        exe_inter = next_time - prev_time
        interval_list += [exe_inter]
        #print(i-1,i, exe_inter)
    return interval_list

def cal_feat(var_name, time_list):
    '''
    var_name = name of the variable
    time_list = list of timestamps each time the variable is executed
    return ->
        [features] = list of features for each variable
        [name, num of exe, mean exe inter, median, mode, [exe inter]]
    '''
    feature_fields = ['name', 'num_of_exe', 'mean_exe_inter', 'median', 'mode', 'exe_inter']
    var_features = []
    interval_mean = 0
    interval_median = 0
    interval_mode = 0
    
    #print(time_list)
    exe_num = len(time_list)
    if len(time_list) == 1:
        interval_list = [0]
    elif len(time_list) > 1:
        interval_list = cal_interval(time_list)
        interval_mean = st.mean(interval_list)
        interval_median = st.median(interval_list)
        interval_mode = st.mode(interval_list)
    
    var_features += [var_name]
    var_features += [exe_num]
    var_features += [interval_mean]
    var_features += [interval_median]
    var_features += [interval_mode]
    var_features += [interval_list]
    #print(var_features)
    return(var_features)

def prepare_to_write(features_list):
    '''
    write features to csv file
    prepare the data to write using pandas
    
    '''
    name = []
    num_of_exe = []
    mean_exe_inter = []
    median = []
    mode = []
    exe_inter = []
    for feat in features_list:
        #print(feat)
        name += [feat[0]]
        num_of_exe += [feat[1]]
        mean_exe_inter += [feat[2]]
        median += [feat[3]]
        mode += [feat[4]]
        exe_inter += [feat[5]]
        assert(len(name)==len(num_of_exe)==len(mean_exe_inter)==len(median)==len(mode)==len(exe_inter))
        
    feature_fields = ['name', 'num_of_exe', 'mean_exe_inter', 'median', 'mode', 'exe_inter']
    to_write = {
                feature_fields[0]:name,
               feature_fields[1]:num_of_exe,
               feature_fields[2]:mean_exe_inter,
               feature_fields[3]:median,
               feature_fields[4]:mode,
               feature_fields[5]:exe_inter
               }
    return(to_write)

def write_to_csv(data, name):
    '''
    data in dict format, where keys form the column names
    '''
    df = pd.DataFrame(data)
    df.to_csv(name+'.csv', index=False)


########### trace processing ##########
def read_traces(log_path):
    '''
    read the trace files and extract variable names
    '''
    with open(log_path, 'r') as f:
        data = json.load(f)
    return data

def generate_map(raw_trace):
    '''
    raw_trace -> list of event trace generated during logging
    return:
        event_map -> takes the variable name and gives corresponding event number
        event_remap -> takes event number and gives associated variable name
    '''
    unique_events = list(set(raw_trace))
    event_map = dict()
    event_remap = dict()
    for i in range(len(unique_events)):
        event_remap[i+1] = unique_events[i]
        event_map[unique_events[i]] = i+1

    return(event_map, event_remap)


### Input to select the scenario

In [None]:
############# configuration ################
############################################

code = 'theft_protection'       ### application (code)
behaviour = 'wrong_txinter'            ### normal, semantic_error
thread_typ = 'single'           ### single, multi
version = 2                     ### format of data collection

base_dir = '../trace_data' ### can be replaced with 'csv', 'exe_plot', 'histogram'
log_path = base_dir+f'/{code}/{thread_typ}_thread/version_{version}/{behaviour}'

### Get paths to the files

In [None]:
######### file names
all_files = os.listdir(log_path)
all_files.sort()
logs = []
traces = []
unknown = []
for i in all_files:
    if i.find('log') == 0:
        logs += [i]
    elif i.find('trace') == 0:
        traces += [i]
    else:
        unknown += [i]

######### path to files
paths_log = [os.path.join(log_path, x) for x in logs]
paths_traces = [os.path.join(log_path, x) for x in traces]
paths_log.sort()

# Logs

### Calculate features for each log file and save it to csv file

In [None]:
###### input ######
select_file = -1
write_flag = True
###### input ######

In [None]:
######### calculate feature for each log file and save it to csv file #######
#############################################################################
## -1: all files, otherwise specify the index number

problem = []
if select_file == -1:
    for (p,w) in zip(paths_log, logs):
        try:
            var_list, data = read_logs(p)
        except:
            print(w,' not processed')
            problem += [w]
        list(var_list).sort()
        to_write_name = p.replace('trace_data', 'csv')

        ######### extract features
        features_list = []
        for var in var_list:
            features_list += [cal_feat(var, data[var])]
        ######### write data to csv
        isPath = os.path.exists(os.path.dirname(to_write_name)) ### check if the path exists
                ### create the folder if it does not exist
        if not isPath:
            os.makedirs(os.path.dirname(to_write_name))

        to_write = prepare_to_write(features_list)
        write_to_csv(to_write, to_write_name)
else:
    var_list, data = read_logs(paths_log[select_file])
    var_list = list(var_list)
    var_list.sort()
    to_write_name = paths_log[select_file].replace('trace_data', 'csv')

    ######### extract features
    features_list = []
    for var in var_list:
        #print(var)
        features = cal_feat(var, data[var])
        features_list += [features]
        #break

    if write_flag:
        ######### write data to csv
        isPath = os.path.exists(os.path.dirname(to_write_name)) ### check if the path exists
        ### create the folder if it does not exist
        if not isPath:
            os.makedirs(os.path.dirname(to_write_name))

        to_write = prepare_to_write(features_list)
        write_to_csv(to_write, to_write_name)

    

### Get range of exe time of each variable and look at data from all the logs wrt each variable


In [None]:
##### Get range of exe time of each variable and look at data from all the logs wrt each variable ######
########################################################################################################

col_names = []
analysis_data = dict() ### store data w.r.t the variable names
analysis_data['labels'] = [] ### name of the log files
analysis_min_max = dict() ### store data w.r.t the variable names
analysis_min_max['labels'] = [] ### name of the log files

### get the list of variables from all the log files
var_all = []
for p in paths_log:
    var_list, _ = read_logs(p)   ### data of each log file
    for v in var_list:
        if v not in var_all:
            var_all += [v]

### initialize the dict
for v in var_all:
    analysis_data[v] = []
    analysis_min_max[v] = []


### fill the dict with data
for (p,w) in zip(paths_log, logs):
    _, data = read_logs(p)   ### data of each log file
    #print(data)
    analysis_data['labels'] += [w]
    analysis_min_max['labels'] += [w]
    
    for v in var_all:
        try:
            features = cal_feat(v, data[v])
            #print(features)
            exe_time = features[5] ### get execution intervals
            exe_time = list(set(exe_time))
            analysis_data[v] += [exe_time]
            min_exe = min(exe_time) ### minimum exe time
            max_exe = max(exe_time) ### maximum exe time
            analysis_min_max[v] += [(min_exe, max_exe)]
        except:
            print(f'Variable {v} not found in file {w}')
        
        
write_to_csv(analysis_data, f'./exe_time/{thread_typ}_version{version}_exe_time_{behaviour}') 
write_to_csv(analysis_min_max, f'./min_max/{thread_typ}_version{version}_min_max_{behaviour}')    
    

### plot histogram 


### Get the execution plots

#### Log wise

In [None]:
############ prepare data to plot #############
###############################################

to_plot = []   ### in format -> [file_name, [[<exe inters of var 1>], [<exe inters of var 2>], .... ]]

### get the list of variables from all the log files
var_all = []
for p in paths_log:
    var_list, _ = read_logs(p)   ### data of each log file
    for v in var_list:
        if v not in var_all:
            var_all += [v]

### read files, iterate over file names and file paths
for (p,w) in zip(paths_log, logs):
    _, data = read_logs(p)   ### data of each log file
    xy_data = [] ### execution intervals
    var_names = []
    for v in var_all:
        try:
            time_list = data[v]   ### get the timestamps
            exe_time = []
            timestamp = []
            for (t1,t2) in zip(time_list[0:-1], time_list[1:]):
                tdiff = t2-t1
                #print(tdiff)
                exe_time+=[tdiff]
                timestamp+=[t2]
            assert(len(exe_time)==len(timestamp))
            var_names += [v]
            xy_data += [(exe_time,timestamp)]
        except:
            print(f'Variable {v} not found in file {w}')

    #break
    #print(w, len(xy_data[3][1]))
    assert(len(var_names)==len(xy_data))
    to_plot += [(p,var_names,xy_data)]   ### [name of the file to write plots, labels for legend (var names), execution intervals for respective variables(y_data), timestamps(x_data)]
    

In [None]:
####### line plot for analysis
import matplotlib.pyplot as plt
import numpy as np

### var_names actually stand for variable names
for (name, var_names, xy_data) in to_plot:
    ### path to save the plots
    to_write_name = name.replace('trace_data', 'exe plots')
    file_name = os.path.basename(to_write_name)
    file_name = f'{thread_typ}_version{version}_{behaviour}_{file_name}'
    dir_name = os.path.dirname(to_write_name)
    to_write_name = os.path.join(dir_name, file_name)
    #print(to_write_name)
    isPath = os.path.exists(dir_name) ### check if the path exists
    ### create the folder if it does not exist
    if not isPath:
        os.makedirs(os.path.dirname(to_write_name))
    
    # print(name, var_names, y_data)
    max_len = 0
    ### get the max len of the longest list to resize all 
    for (v,xy) in zip(var_names, xy_data):
            if len(xy)>max_len:
                max_len=len(xy)

    # ### make all the list of same size to be able to plot
    # for (num, (v,xy)) in enumerate(zip(var_names, xy_data)):
    #     _y = np.zeros((max_len))
    #     for (n,i) in enumerate(xy):
    #         _y[n] = i
    #     #print(_y)
    #     y_data[num]=_y
    
    ########## make data frame to be able to plot ################
    df = dict()
    _y_all = [] ### to adjust y-ticks
    legend_lab = [] ### collect names of the plots only
    line_style = ['solid', 'dashed', 'dashdot', 'dotted']
    markers = ['.','o','*','+','^','x','d','h',',','H','D']
    fig = plt.figure(figsize =(20, 9))
    print(xy_data)
    for (num, (v,xy)) in enumerate(zip(var_names, xy_data)):
        x = xy[1]
        # x = [i-x[0] for i in x]   ### get timestamps relative to first timestamp
        y = xy[0]
        ### ignore all the variables that are only executed once
        if xy[1]!= []:
            #print(x,y)
            df[v]=xy
            _y_all.extend(y)
            legend_lab.append(v)
            
            plt.plot(x, y, ls=line_style[num%4], marker=markers[num%11])
    plt.legend(legend_lab, bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
    #plt.xticks(np.arange(0, max_len+1, 2)) ### x-ticks every 2 intervals
    plt.yticks(np.arange(min(_y_all), max(_y_all)+1000, 500)) ### y ticks every 500ms
    plt.xlabel('Number of execution intervals')
    plt.ylabel('Execution interval (in ms)')
    plt.grid(True)
    plt.title(f'{os.path.basename(name)}')
    plt.savefig(f'{to_write_name}.png', bbox_inches='tight', transparent=False)
    plt.show()

    #break
#print(max_len)


#### Variable wise

In [None]:
############ prepare data to plot #############
###############################################

to_plot = []   ### in format -> [var_name, ( [[<exe inters of var in log1>], [timestamps ]  )]

### get the list of variables from all the log files
var_all = []
for p in paths_log:
    var_list, _ = read_logs(p)   ### data of each log file
    for v in var_list:
        if v not in var_all:
            var_all += [v]


### collect data for each variable from each log file
for v in var_all:
    xy_data = [] ### execution intervals
    log_names = []
    for (p,w) in zip(paths_log, logs):
        try:
            _, data = read_logs(p)   ### data of each log file
            time_list = data[v]   ### get the timestamps
            exe_time = []
            timestamp = []
            for (t1,t2) in zip(time_list[0:-1], time_list[1:]):
                tdiff = t2-t1
                #print(tdiff)
                exe_time+=[tdiff]
                timestamp+=[t2]
            assert(len(exe_time)==len(timestamp))
            log_names += [w]
            xy_data += [(exe_time,timestamp)]
        except Exception as e:
            print(e)

    assert(len(log_names)==len(xy_data))
    to_plot += [(p.replace(w,v),log_names,xy_data)]  ### [name of the file to write plots(variable name), labels for legend (log names), execution intervals for respective variables(y_data), timestamps(x_data)]



In [None]:
####### line plot for analysis
import matplotlib.pyplot as plt
import numpy as np

### name represents the name of respective variable with which file will be saved
for (name, log_names, xy_data) in to_plot:
    ### path to save the plots
    to_write_name = name.replace('trace_data', 'exe plots')
    file_name = os.path.basename(to_write_name)
    file_name = f'{thread_typ}_version{version}_{behaviour}_{file_name}'
    dir_name = os.path.dirname(to_write_name)
    to_write_name = os.path.join(dir_name, file_name)
    #print(to_write_name)
    isPath = os.path.exists(os.path.dirname(to_write_name)) ### check if the path exists
    ### create the folder if it does not exist
    if not isPath:
        os.makedirs(os.path.dirname(to_write_name))

    
    ########## make data frame to be able to plot ################
    df = dict()
    _y_all = [] ### to adjust y-ticks
    legend_lab = [] ### collect names of the plots only
    line_style = ['solid', 'dashed', 'dashdot', 'dotted']
    markers = ['.','o','*','+','^','x','d','h',',','H','D']
    fig = plt.figure(figsize =(20, 9))
    print(xy_data)
    for (num, (l,xy)) in enumerate(zip(log_names, xy_data)):
        x = xy[1]
        #x = [i-x[0] for i in x]   ### get timestamps relative to first timestamp
        y = xy[0]
        ### ignore all the variables that are only executed once
        if xy[1]!= []:
            #print(x,y)
            df[l]=xy
            _y_all.extend(y)
            legend_lab.append(l)
            
            plt.plot(x, y, ls=line_style[num%4], marker=markers[num%11])
        
    if _y_all != []:
        plt.legend(legend_lab, bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
        plt.yticks(np.arange(min(_y_all), max(_y_all)+500, 500)) ### y ticks every 500ms
        plt.xlabel('Number of execution intervals')
        plt.ylabel('Execution interval (in ms)')
        plt.grid(True)
        plt.title(f'{os.path.basename(name)}')
        plt.savefig(f'{to_write_name}.png', bbox_inches='tight')
        plt.show()

    #break


# Event Traces

### Process Traces

In [None]:
########## process the traces ###########
raw_trace = read_traces(paths_traces[0])
to_number, from_number = generate_map(raw_trace)

col_data = []
for (p,w) in zip(paths_traces, traces):
    trace = read_traces(p)
    num_trace = []
    for t in trace:
        nt = to_number[t]
        #print(nt)
        num_trace.extend([nt])
    col_data += [(w, num_trace)]   ### in the format (trace_name, event trace) 


### Generate excel sheet

In [None]:
######### write the data to excel sheet ###########

import xlsxwriter

workbook = xlsxwriter.Workbook(f'{thread_typ}_version{version}_eventtrace_{behaviour}.xlsx')

### add worksheet for mapper
worksheet = workbook.add_worksheet('mapper')

### convert dict to list and write to xls

worksheet.write(0, 0, 'event num')
worksheet.write(0, 1, 'event name')

row = 1
col = 0
keym = list(from_number.keys())
for k in keym:
    val = from_number[k]
    worksheet.write(row, col, k)
    worksheet.write(row, col+1, val)
    row+=1

### add worksheet for traces
worksheet = workbook.add_worksheet('traces')
row = 0
col = 0
for (name, trace) in col_data:
    worksheet.write(row, col, name)
    row+=1
    for t in trace:
        worksheet.write(row, col, t)
        row+=1
    col+=1
    row=0

workbook.close()

In [None]:
### check if all traces are same #######

In [None]:
'''
TODO:
- read SOTA for types of Faults
- Prepare poster for Mamba
- Prepare slides for CN SPace
- German HW
'''