## Event log COMPLEXITY

In [496]:
### IMPORT ###
from pathlib import Path
import pandas as pd
import pm4py
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery

### LOCAL IMPORT ###
from config import config_reader

In [497]:
### GLOBALS ###
yaml_config = config_reader.config_read_yaml("config.yml", "config")
# print(yaml_config) # debug
log_dir = str(yaml_config["LOG_DIR"])
stats_dir = str(yaml_config["STATS_DIR"])

level = "PAGE" # PARA, PAGE
file_name = f"edu_event_log_{level}_raw_filtered_DISCO_ter_enr_no_SURVEY.csv" # input file to be read
id_column = "Case ID"
activity_column = "Activity"
timestamp_column = "Complete Timestamp"
usability_col = "UEQ - Overall_Tercile" # [SUS_Tercile, Apprendimento percepito_Tercile, UEQ - Overall_Tercile]
usability_val_list = [1, 3]

# FUNCTIONS

In [498]:
def calculate_cyclomatic_complexity(xes_log):
    """
    Calculate the cyclomatic complexity from an XES event log.

    Parameters:
    xes_log: The event log in XES format (pm4py EventLog object). It should represent a single process execution log.

    Returns:
    float: The cyclomatic complexity of the resulting Directly-Follows Graph (DFG).
    """
    # Obtain the DFG from the XES log
    dfg = dfg_discovery.apply(xes_log)
    
    # Extract nodes from the DFG keys (each key is a tuple (activity1, activity2))
    nodes = set()
    for (s, t) in dfg.keys():
        nodes.add(s)
        nodes.add(t)
    
    # Count the number of nodes and edges
    N = len(nodes)
    E = len(dfg.keys())
    
    # Assume the graph is connected, so P = 1
    P = 1  
    
    # Calculate cyclomatic complexity: V(G) = E - N + 2P
    V = E - N + 2 * P
    
    return V

## MAIN

In [499]:
# Load the CSV
print(">> Reading event log")
path_log = Path(log_dir) / file_name
print("Path:", path_log)
dic_t = {'Case ID':object, 'CaseLength':int, 'SUS_Tercile':int, 'Apprendimento percepito_Tercile':int, 'UEQ - Overall_Tercile':int, 'QuizAnswerCorrectRatioOverAll_Tercile':int} 
df_log = pd.read_csv(path_log, sep = ",", dtype=dic_t, low_memory=False)

>> Reading event log
Path: data_log/edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_no_SURVEY.csv


In [500]:
#  Removes columns added by DISCO
col_del_list = ['Variant', 'Variant index']
for col_del in col_del_list:
  if col_del in df_log.columns:
    df_log.drop(columns=col_del, inplace=True)

In [501]:
df_log.columns

Index(['Case ID', 'Activity', 'Complete Timestamp', 'pageTitle', 'menu',
       'pageOrder', 'pagePara', 'eventPage', 'click_num', 'dbclick_num',
       'QuizSessionCount', 'QuizAnswerCorrectTotal', 'QuizAnswerWrongTotal',
       'QuizAnswerCorrectRatioOverCount', 'QuizAnswerCorrectRatioOverAll',
       'Q_1', 'Q_2', 'Q_3', 'Q_4', 'Q_5', 'Q_6', 'Q_7', 'Q_8', 'Q_9', 'Q_10',
       'Q_11', 'Q_12', 'Q_13', 'Q_14', 'Q_15', 'Q_16', 'Q_17', 'Q_18', 'Q_19',
       'Q_20', 'Q_21', 'Q_22', 'Q_23', 'Q_24', 'Q_25', 'Q_26', 'Q_27', 'Q_28',
       'SUS', 'Apprendimento percepito', 'UEQ - Pragmatic', 'UEQ - Hedonic',
       'UEQ - Overall', 'TotalTimeHH', 'TotalTimeMM', 'TotalTimeDD',
       'CaseLength', 'Class', 'SUS_Tercile', 'Apprendimento percepito_Tercile',
       'UEQ - Overall_Tercile', 'QuizAnswerCorrectRatioOverAll_Tercile',
       'TotalTimeMM.1', 'Class_Count', 'FN_N', 'FN_Q', 'FN_A', 'FN_C', 'FN_I',
       'FN_CT', 'A_Time_s', 'A_Time_m', 'Backward_Jumps'],
      dtype='object')

In [502]:
# Dimension before cleaning
df_log.shape

(7399, 68)

In [503]:
case_n = df_log[id_column].nunique()
print("Cases:", case_n)

Cases: 332


### Menu values

In [504]:
print(">> Menu values")
menu_list = sorted(list(df_log["menu"].unique()))
menu_list_len = len(menu_list)
print(f"Menu values ({menu_list_len}):", menu_list)

>> Menu values
Menu values (4): ['menu_1', 'menu_2', 'menu_3', 'menu_4']


In [505]:
df_log_menu_2 = df_log.groupby('Case ID').filter(lambda g: {'menu_1', 'menu_2'}.issubset(g['menu'].unique()))
case_n_1 = df_log_menu_2[id_column].nunique()
print("Cases:", case_n_1)

Cases: 113


In [506]:
df_log_menu_3 = df_log.groupby('Case ID').filter(lambda g: {'menu_1', 'menu_3'}.issubset(g['menu'].unique()))
case_n_2 = df_log_menu_3[id_column].nunique()
print("Cases:", case_n_2)

Cases: 80


In [507]:
df_log_menu_4 = df_log.groupby('Case ID').filter(lambda g: {'menu_1', 'menu_4'}.issubset(g['menu'].unique()))
case_n_3 = df_log_menu_4[id_column].nunique()
print("Cases:", case_n_3)

Cases: 139


In [508]:
# Check
print(case_n == (case_n_1+case_n_2+case_n_3))

True


In [509]:
list_df_log = []
list_df_log.append({"menu":2, "df":df_log_menu_2})
list_df_log.append({"menu":3, "df":df_log_menu_3})
list_df_log.append({"menu":4, "df":df_log_menu_4})

### Conversion to XES and computing Cyclomatic Complexity

In [510]:
list_results = []
for dic_df in list_df_log:
    print("> Formatting the event log")
    df_log = dic_df["df"]
    menu_val = dic_df["menu"]
    cases_all = df_log[id_column].nunique()
    print("Total cases:", cases_all)
    df_log = pm4py.format_dataframe(df_log, case_id=id_column, activity_key=activity_column, timestamp_key=timestamp_column)
    
    print("> Saving the event log to XES")
    file_xes = f"{Path(file_name).stem}_menu_1-{menu_val}.xes"
    path_xes = Path(log_dir) / file_xes
    print("Saving XES file to:", path_xes)
    pm4py.write_xes(df_log, path_xes, case_id_key='case:concept:name')

    # Filter by usability columns
    for usability_val in usability_val_list:
        df_log_filterd = df_log[df_log[usability_col] == usability_val]
        cases_tercile = df_log_filterd[id_column].nunique()
        print("Total cases (filtered):", cases_all)
        # Save it to XES
        print("> Saving the event log to XES")
        file_xes = f"{Path(file_name).stem}_menu_1-{menu_val}_{usability_col}_{usability_val}.xes"
        path_xes = Path(log_dir) / file_xes
        print("Saving XES file to:", path_xes)
        pm4py.write_xes(df_log_filterd, path_xes, case_id_key='case:concept:name')
        # Load the XES
        xes_log = pm4py.read_xes(path_xes.as_posix())
        v = calculate_cyclomatic_complexity(xes_log)
        print("Cyclomatyc complexity:", v)
        dic_res = {"file_name": file_xes, "menu":menu_val, "cases_all": cases_all, "cases_tercile":cases_tercile, "usability_column":usability_col, "usability_value":usability_val, "cyclomatic_complexity": v}
        list_results.append(dic_res)

> Formatting the event log
Total cases: 113
> Saving the event log to XES
Saving XES file to: data_log/edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_no_SURVEY_menu_1-2.xes


exporting log, completed traces :: 100%|██████████| 113/113 [00:00<00:00, 324.49it/s]


Total cases (filtered): 113
> Saving the event log to XES
Saving XES file to: data_log/edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_no_SURVEY_menu_1-2_UEQ - Overall_Tercile_1.xes


exporting log, completed traces :: 100%|██████████| 39/39 [00:00<00:00, 312.08it/s]
parsing log, completed traces :: 100%|██████████| 39/39 [00:00<00:00, 178.93it/s]


Cyclomatyc complexity: 41
Total cases (filtered): 113
> Saving the event log to XES
Saving XES file to: data_log/edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_no_SURVEY_menu_1-2_UEQ - Overall_Tercile_3.xes


exporting log, completed traces :: 100%|██████████| 33/33 [00:00<00:00, 333.99it/s]
parsing log, completed traces :: 100%|██████████| 33/33 [00:00<00:00, 134.41it/s]


Cyclomatyc complexity: 38
> Formatting the event log
Total cases: 80
> Saving the event log to XES
Saving XES file to: data_log/edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_no_SURVEY_menu_1-3.xes


exporting log, completed traces :: 100%|██████████| 80/80 [00:00<00:00, 295.74it/s]


Total cases (filtered): 80
> Saving the event log to XES
Saving XES file to: data_log/edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_no_SURVEY_menu_1-3_UEQ - Overall_Tercile_1.xes


exporting log, completed traces :: 100%|██████████| 31/31 [00:00<00:00, 289.94it/s]
parsing log, completed traces :: 100%|██████████| 31/31 [00:00<00:00, 164.75it/s]


Cyclomatyc complexity: 59
Total cases (filtered): 80
> Saving the event log to XES
Saving XES file to: data_log/edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_no_SURVEY_menu_1-3_UEQ - Overall_Tercile_3.xes


exporting log, completed traces :: 100%|██████████| 28/28 [00:00<00:00, 325.01it/s]
parsing log, completed traces :: 100%|██████████| 28/28 [00:00<00:00, 168.07it/s]


Cyclomatyc complexity: 35
> Formatting the event log
Total cases: 139
> Saving the event log to XES
Saving XES file to: data_log/edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_no_SURVEY_menu_1-4.xes


exporting log, completed traces :: 100%|██████████| 139/139 [00:00<00:00, 299.41it/s]


Total cases (filtered): 139
> Saving the event log to XES
Saving XES file to: data_log/edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_no_SURVEY_menu_1-4_UEQ - Overall_Tercile_1.xes


exporting log, completed traces :: 100%|██████████| 47/47 [00:00<00:00, 323.69it/s]
parsing log, completed traces :: 100%|██████████| 47/47 [00:00<00:00, 180.79it/s]


Cyclomatyc complexity: 48
Total cases (filtered): 139
> Saving the event log to XES
Saving XES file to: data_log/edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_no_SURVEY_menu_1-4_UEQ - Overall_Tercile_3.xes


exporting log, completed traces :: 100%|██████████| 47/47 [00:00<00:00, 318.20it/s]
parsing log, completed traces :: 100%|██████████| 47/47 [00:00<00:00, 172.30it/s]

Cyclomatyc complexity: 48





In [511]:
df_results = pd.DataFrame(list_results)

In [512]:
df_results

Unnamed: 0,file_name,menu,cases_all,cases_tercile,usability_column,usability_value,cyclomatic_complexity
0,edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_...,2,113,39,UEQ - Overall_Tercile,1,41
1,edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_...,2,113,33,UEQ - Overall_Tercile,3,38
2,edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_...,3,80,31,UEQ - Overall_Tercile,1,59
3,edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_...,3,80,28,UEQ - Overall_Tercile,3,35
4,edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_...,4,139,47,UEQ - Overall_Tercile,1,48
5,edu_event_log_PAGE_raw_filtered_DISCO_ter_enr_...,4,139,47,UEQ - Overall_Tercile,3,48


In [513]:
path_res = Path(stats_dir) / "cyclomatic_complexity_by_menu_usability.csv"
df_results.to_csv(path_res, sep=";", index=False, mode="a")