In [None]:
""""
This file is used to execute to whole process in a chronologically order
"""

In [None]:
# Required Imports
import os
from dotenv import load_dotenv
import json

import sys

sys.path.append('pre-processing/event_log')
sys.path.append('compliance-verification')

# Load environment variables from the .env file
load_dotenv()
HOME_PATH = os.environ['HOME_PATH']

In [None]:
# Pre-Process

In [None]:
# 1.1 Pre-Process a Description
# 1.1.1 Using GPT-4
# Import Output file from: data
bicycle_gpt4_output_path = HOME_PATH + '/data/output/pre_processing_process_description/gpt-4-v2/bicycle_manufacturing-gpt-4-v2.json'

# Load description pre-process file
with open(bicycle_gpt4_output_path) as f:
    bicycle_gpt4_pre_process = json.load(f)

print(bicycle_gpt4_pre_process)

In [None]:
# 1.1 Pre-Process a Description
# 1.1.2 Using Customized Solution

In [None]:
# 1.2 Pre-Process of an Event Log
# 1.2.1 Using customized solution with json outputs
dataframe_path = HOME_PATH + '/data/input/log/original/bicycle_manufacturing_log.csv'
case_id_column_name = "concept:instance"
activity_column_name = "concept:name"
timestamp_key_name = "time:timestamp"
used_separator = ","
file_name = "bicycle_manufacturing_original"
output_path = HOME_PATH + 'data/output/pre_processing_event_log/'

# use method in pre_process_event_log_generator
from src.pre_processing.event_log.pre_process_event_log_generator import create_event_log_pre_process_json
pre_processed_bicycle_original_log = create_event_log_pre_process_json(dataframe_path,
                                                                       case_id_column_name,
                                                                       activity_column_name,
                                                                       timestamp_key_name,
                                                                       used_separator,
                                                                       file_name,
                                                                       output_path)
print(pre_processed_bicycle_original_log)


In [None]:
# 2. Compliance Verification Process


# Required Input data:
# Input path pre-processed log:
path_preprocessed_event_log = HOME_PATH + '/data/output/pre_processing_event_log/bicycle_manufacturing_original.json'
# Input path pre-processed description:
path_preprocessed_description = HOME_PATH + '/data/output/pre_processing_process_description/gpt-4-v2/bicycle_manufacturing-gpt-4-v2.json'
# Choose either: { "TF-IDF", "BERT", or "SPACY"}
similarity_measure = "TF-IDF"
# Choose a value between 0 and 1
threshold_activity = 0.65
# Choose a value between 0 and 1
threshold_resource_activity = 0.65
# Choose either: { "['user']", "['role']", "['org_unit']", "['org']", "['user']['role']", "['user']['role']['org_unit']['org']"}
resource_types = ['org_unit']
# Choose either True or False: Default False: Only Resource are checked in Step 2
check_resource_and_activity = False
# If True a pattern check is executed, Else default resource compliance check is executed
perform_pattern_rar_check = False
# Output file place to store
file_name = 'bicycle_manufacturing_original'
output_path = HOME_PATH + '/data/output/compliance_verification/bicycle_manufacturing_original/'

from compliance_verification.output.compliance_output_creator import create_compliant_json_activity, create_compliant_json_ids, create_compliant_json_events

# 2.1 Create activity check
create_compliant_json_activity(path_preprocessed_event_log, path_preprocessed_description, similarity_measure,
                               threshold_activity, file_name, output_path)
# 2.2 Create resource activity check: id and event
# Trace Ids
create_compliant_json_ids(path_preprocessed_event_log, path_preprocessed_description, similarity_measure,
                          threshold_resource_activity, resource_types, check_resource_and_activity, perform_pattern_rar_check, file_name,
                           output_path)
# Events
create_compliant_json_events(path_preprocessed_event_log, path_preprocessed_description, similarity_measure,
                             threshold_resource_activity, resource_types, check_resource_and_activity, perform_pattern_rar_check, file_name,
                              output_path)
