# Imports and Setup

In [79]:
import pm4py
import numpy as np
import pandas as pd
import pdb
import os
import yatter
from ruamel.yaml import YAML
import kglab

In [80]:
## define paths
data_dir = '../data'
output_dir = '../output'
config_dir = '../config'
os.makedirs(data_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
os.makedirs(config_dir, exist_ok=True)

mapping_file = data_dir + '/sample_mapping.yaml'
rml_output_path = output_dir + '/sample_mapping_rml.ttl'
kg_config_path = config_dir + '/sample_kg_config.yaml'

In [81]:
## define KGLab config
config = f"""
[test]
mappings={rml_output_path}
"""
with open('config.ini', 'w') as f:
    f.write(config)
# define KG namespaces   
namespaces = {
    'ex:' : "http://example.com/",
    'on:' : "https://stl.mie.utoronto.ca/ontologies/spm/"
}

In [82]:
def load_df_from_log(log_path):
  """
  Return a dataframe from a given XES log filepath or CSV
  """
  if any(log_path.lower().endswith(ext) for ext in ['.xes', '.xes.gz']):
    log = pm4py.read_xes(log_path)
    df = pm4py.convert_to_dataframe(log)
  elif log_path.lower().endswith('.csv'):
    df = pd.read_csv(log_path)

  return df

In [83]:
# view the sample event log
log_df = load_df_from_log('../data/sample_log.csv')
log_df.head()

Unnamed: 0,caseID,activityID,eventID,timestamp,resourceID
0,case_0,activity_A,event_0,2016-01-01 09:00:00.000000+00:00,user_1
1,case_0,activity_A,event_1,2016-01-01 09:15:00.000000+00:00,user_1
2,case_0,activity_C,event_2,2016-01-01 09:35:00.000000+00:00,user_1
3,case_1,activity_A,event_3,2016-01-02 09:00:00.000000+00:00,user_1
4,case_1,activity_B,event_4,2016-01-02 09:00:00.000000+00:00,user_0


In [84]:
## convert YARRRML to RML
yaml = YAML(typ='safe', pure=True)
yarrrml_content = yaml.load(open(mapping_file))
rml_content = yatter.translate(yarrrml_content)
rml_file = open(rml_output_path, 'w')
rml_file.write(rml_content)
rml_file.close()

2024-07-29 20:04:06,326 | INFO: Translating YARRRML mapping to [R2]RML
2024-07-29 20:04:06,328 | INFO: RML content is created!


2024-07-29 20:04:06,336 | INFO: Mapping has been syntactically validated.
2024-07-29 20:04:06,337 | INFO: Translation has finished successfully.


In [85]:
# init knowledge graph
kg = kglab.KnowledgeGraph(name="event-log-sample", namespaces=namespaces)
# create instances from mapping
kg.materialize('config.ini')
# save rdf instances
kg.save_rdf(output_dir + '/sample_log_instances.ttl')

2024-07-29 20:04:06,360 | DEBUG: CONFIGURATION: {'output_file': 'knowledge-graph', 'na_values': ',nan', 'safe_percent_encoding': '', 'read_parsed_mappings_path': '', 'write_parsed_mappings_path': '', 'mapping_partitioning': 'PARTIAL-AGGREGATIONS', 'logging_file': '', 'oracle_client_lib_dir': '', 'oracle_client_config_dir': '', 'udfs': '', 'output_dir': '', 'output_format': 'N-TRIPLES', 'only_printable_chars': 'no', 'infer_sql_datatypes': 'no', 'logging_level': 'INFO', 'number_of_processes': '24'}
2024-07-29 20:04:06,362 | DEBUG: DATA SOURCE `test`: {'mappings': '../output/sample_mapping_rml.ttl'}
2024-07-29 20:04:07,060 | INFO: 8 mapping rules retrieved.
2024-07-29 20:04:07,068 | DEBUG: All predicate maps are constant-valued, invariant subset is not enforced.
2024-07-29 20:04:07,073 | DEBUG: All graph maps are constant-valued, invariant subset is not enforced.
2024-07-29 20:04:07,078 | INFO: Mapping partition with 8 groups generated.
2024-07-29 20:04:07,080 | INFO: Maximum number of ru