In [None]:
import numpy as np
import plotly
import json
import os
import plotly.express as px
import numpy as np
import statistics as st
import pandas as pd

def cal_feat(var_name, time_list):
    '''
    var_name = name of the variable
    time_list = list of timestamps each time the variable is executed
    return ->
        [features] = list of features for each variable
        [name, num of exe, mean exe inter, median, mode, [exe inter]]
    '''
    feature_fields = ['name', 'num_of_exe', 'mean_exe_inter', 'median', 'mode', 'exe_inter']
    var_features = []
    interval_mean = 0
    interval_median = 0
    interval_mode = 0
    
    #print(time_list)
    exe_num = len(time_list)
    if len(time_list) == 1:
        interval_list = [0]
    elif len(time_list) > 1:
        interval_list = cal_interval(time_list)
        interval_mean = st.mean(interval_list)
        interval_median = st.median(interval_list)
        interval_mode = st.mode(interval_list)
    
    var_features += [var_name]
    var_features += [exe_num]
    var_features += [interval_mean]
    var_features += [interval_median]
    var_features += [interval_mode]
    var_features += [interval_list]
    #print(var_features)
    return(var_features)
    
def cal_interval(time_list):
    '''
    time_list = list of timestamps each time the variable is executed
    return ->
        [exe interval] = list of execution intervals of consecutive executions
    '''
    interval_list = []
    for i in range(1,len(time_list)):
        prev_time = time_list[i-1]
        next_time = time_list[i]
        exe_inter = next_time - prev_time
        interval_list += [exe_inter]
        #print(i-1,i, exe_inter)
    return interval_list

def read_file(log_path):
    '''
    read the log files and extract variable names
    '''
    with open(log_path, 'r') as f:
        data = json.load(f)
    var = data.keys()   ### variables in the code

    return(var, data)

def prepare_to_write(features_list):
    '''
    prepare the data to write using pandas
    
    '''
    name = []
    num_of_exe = []
    mean_exe_inter = []
    median = []
    mode = []
    exe_inter = []
    for feat in features_list:
        #print(feat)
        name += [feat[0]]
        num_of_exe += [feat[1]]
        mean_exe_inter += [feat[2]]
        median += [feat[3]]
        mode += [feat[4]]
        exe_inter += [feat[5]]
        assert(len(name)==len(num_of_exe)==len(mean_exe_inter)==len(median)==len(mode)==len(exe_inter))
        
    feature_fields = ['name', 'num_of_exe', 'mean_exe_inter', 'median', 'mode', 'exe_inter']
    to_write = {
                feature_fields[0]:name,
               feature_fields[1]:num_of_exe,
               feature_fields[2]:mean_exe_inter,
               feature_fields[3]:median,
               feature_fields[4]:mode,
               feature_fields[5]:exe_inter
               }
    return(to_write)

def write_to_csv(data, name):
    df = pd.DataFrame(data)
    df.to_csv(name+'.csv')

In [None]:
path_normaldata = '../trace_data/scientific practise/normal'
path_semerrdata = '../trace_data/scientific practise/semanttic error_1/'
path_faultyswitch = '../trace_data/scientific practise/faulty_switch/'

######### file names
norm_log = os.listdir(path_normaldata)
semerr_log = os.listdir(path_semerrdata)
faultyswitch_log = os.listdir(path_faultyswitch)

######### path to files
norm_log_path = [os.path.join(path_normaldata, x) for x in norm_log]
semerr_log_path = [os.path.join(path_semerrdata, x) for x in semerr_log]
faultyswitch_log_path = [os.path.join(path_faultyswitch, x) for x in faultyswitch_log]

In [None]:
######### read data
## -1: all files, otherwise specify the index number
select_file = -1

if select_file == -1:
    #for (p,w) in zip(norm_log_path, norm_log):
    #for (p,w) in zip(semerr_log_path, semerr_log):
    for (p,w) in zip(faultyswitch_log_path, faultyswitch_log):
        var_list, data = read_file(p)
        type = os.path.split(os.path.split(p)[0])[1]
        to_write_name = type+'_'+w

        ######### extract features
        features_list = []
        for var in var_list:
            features_list += [cal_feat(var, data[var])]
        ######### write data to csv
        to_write = prepare_to_write(features_list)
        write_to_csv(to_write, f'../csv/{to_write_name}')
else:
    var_list, data = read_file(norm_log_path[select_file])
    to_write_name = norm_log[select_file]

    ######### extract features
    features_list = []
    for var in var_list:
        #print(var)
        features_list += [cal_feat(var, data[var])]
        #break

    ######### write data to csv
    to_write = prepare_to_write(features_list)
    write_to_csv(to_write, f'../csv/{to_write_name}')