# Imports and Setup

In [1]:
import ProMean4Py as pmp
from ProMean4Py import LogProcessor

import os

In [2]:
## define paths
data_dir = '../data'
output_dir = '../output'
config_dir = '../config'
ontology_dir = config_dir + '/ontology'
os.makedirs(data_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
os.makedirs(config_dir, exist_ok=True)
os.makedirs(ontology_dir, exist_ok=True)

mapping_file = data_dir + '/sample_mapping.yaml'
rml_output_path = output_dir + '/sample_mapping_rml.ttl'
kg_config_path = config_dir + '/sample_kg_config.ini'
ontology_file = ontology_dir + '/ex_events_ontology.owl'

# Example Usage

In [31]:
# Example usage
col_dict = {'case_id': 'caseID', 'activity': 'activityID', 'timestamp': 'timestamp', 'resource': 'resourceID', 'event_id' : 'eventID'}
output_dir= '../output/testing/'
namespaces = {'ex' : "http://example.com/", 'on' : "https://stl.mie.utoronto.ca/ontologies/spm/"}
log_processor = LogProcessor('../data/sample_log.csv', process_name='P1', column_dict=col_dict, prefixes=namespaces)
log_processor.save_knowledge_graph(output_dir, format='xml')
log_processor.save_FOL(output_dir)

2025-01-06 21:34:36,141 | INFO: Translating YARRRML mapping to [R2]RML
2025-01-06 21:34:36,143 | INFO: RML content is created!
2025-01-06 21:34:36,159 | INFO: Mapping has been syntactically validated.
2025-01-06 21:34:36,160 | INFO: Translation has finished successfully.
2025-01-06 21:34:36,165 | DEBUG: CONFIGURATION: {'output_file': 'knowledge-graph', 'na_values': ',nan', 'safe_percent_encoding': '', 'read_parsed_mappings_path': '', 'write_parsed_mappings_path': '', 'mapping_partitioning': 'PARTIAL-AGGREGATIONS', 'logging_file': '', 'udfs': '', 'output_kafka_server': '', 'output_kafka_topic': '', 'output_dir': '', 'output_format': 'N-TRIPLES', 'only_printable_chars': 'no', 'infer_sql_datatypes': 'no', 'logging_level': 'INFO', 'number_of_processes': '24'}
2025-01-06 21:34:36,166 | DEBUG: DATA SOURCE `P1`: {'mappings': '/tmp/tmpz106qz51.ttl'}
2025-01-06 21:34:37,803 | INFO: 9 mapping rules retrieved.
2025-01-06 21:34:37,817 | DEBUG: All predicate maps are constant-valued, invariant subs

In [16]:
log_processor.fol_abox

array(['Activity(A_activity_C)', 'Activity(A_activity_B)',
       'Activity(A_activity_D)', 'Activity(A_activity_A)',
       'Event(E_event_3)', 'Event(E_event_2)', 'Event(E_event_0)',
       'Event(E_event_9)', 'Event(E_event_7)', 'Event(E_event_1)',
       'Event(E_event_6)', 'Event(E_event_8)', 'Event(E_event_5)',
       'Event(E_event_4)', 'Resource(R_user_0)', 'Resource(R_user_2)',
       'Resource(R_user_1)', 'Case(C_case_1)', 'Case(C_case_2)',
       'Case(C_case_0)', 'differentFrom(E_event_2, E_event_6)',
       'hasResource(E_event_6, R_user_1)', 'hasCase(E_event_9, C_case_2)',
       'differentFrom(C_case_2, C_case_1)',
       'differentFrom(A_activity_A, A_activity_C)',
       'hasActivity(E_event_0, A_activity_A)',
       'differentFrom(E_event_8, E_event_0)',
       'differentFrom(E_event_8, E_event_7)',
       'differentFrom(E_event_9, E_event_0)',
       'differentFrom(E_event_9, E_event_7)',
       'hasResource(E_event_9, R_user_1)',
       'hasResource(E_event_3, R_use

In [17]:
kg = log_processor.kg
tp_query = "SELECT ?s ?t WHERE {?s ns1:hasRecordedTime ?t}"
df = kg.query_as_df(sparql=tp_query)
df['t'].unique()
    

array([rdflib.term.Literal('2016-01-01 09:15:00.000000+00:00', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#dateTimeStamp')),
       rdflib.term.Literal('2016-01-02 09:00:00.000000+00:00', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#dateTimeStamp')),
       rdflib.term.Literal('2016-01-01 09:00:00.000000+00:00', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#dateTimeStamp')),
       rdflib.term.Literal('2016-01-01 09:10:00.000000+00:00', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#dateTimeStamp')),
       rdflib.term.Literal('2017-01-06 10:35:00.000000+00:00', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#dateTimeStamp')),
       rdflib.term.Literal('2016-01-03 09:00:00.000000+00:00', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#dateTimeStamp')),
       rdflib.term.Literal('2016-01-01 09:35:00.000000+00:00', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#dateTimeStamp'))]

In [18]:
[t for t in kg.rdf_graph()]

[(rdflib.term.URIRef('http://example.com/C_case_2'),
  rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
  rdflib.term.URIRef('https://stl.mie.utoronto.ca/ontologies/spm/Case')),
 (rdflib.term.URIRef('http://example.com/E_event_2'),
  rdflib.term.URIRef('http://www.w3.org/2002/07/owl#differentFrom'),
  rdflib.term.URIRef('http://example.com/E_event_6')),
 (rdflib.term.URIRef('http://example.com/E_event_6'),
  rdflib.term.URIRef('https://stl.mie.utoronto.ca/ontologies/spm/hasResource'),
  rdflib.term.URIRef('http://example.com/R_user_1')),
 (rdflib.term.URIRef('http://example.com/E_event_9'),
  rdflib.term.URIRef('https://stl.mie.utoronto.ca/ontologies/spm/hasCase'),
  rdflib.term.URIRef('http://example.com/C_case_2')),
 (rdflib.term.URIRef('http://example.com/C_case_2'),
  rdflib.term.URIRef('http://www.w3.org/2002/07/owl#differentFrom'),
  rdflib.term.URIRef('http://example.com/C_case_1')),
 (rdflib.term.URIRef('http://example.com/A_activity_A'),
  rdflib.term.URI

# Ping-Pong Example

In [10]:
output_dir= '../output/testing/pingpong'
namespaces = {'ex' : "http://example.com/", 'on' : "https://stl.mie.utoronto.ca/ontologies/spm/"}
log_processor = LogProcessor('../data/BPIC/BPI_Challenge_2013_incidents.xes.gz', process_name='BPI2013Inc', prefixes=namespaces, downsample_rate=0.5)
log_processor.save_knowledge_graph(output_dir, format='xml')
log_processor.save_datalog(output_dir)

parsing log, completed traces :: 100%|██████████| 7554/7554 [00:04<00:00, 1591.49it/s]
2025-01-07 16:54:17,211 | INFO: Translating YARRRML mapping to [R2]RML
2025-01-07 16:54:17,212 | INFO: RML content is created!
2025-01-07 16:54:17,219 | INFO: Mapping has been syntactically validated.
2025-01-07 16:54:17,219 | INFO: Translation has finished successfully.
2025-01-07 16:54:17,247 | DEBUG: CONFIGURATION: {'output_file': 'knowledge-graph', 'na_values': ',nan', 'safe_percent_encoding': '', 'read_parsed_mappings_path': '', 'write_parsed_mappings_path': '', 'mapping_partitioning': 'PARTIAL-AGGREGATIONS', 'logging_file': '', 'udfs': '', 'output_kafka_server': '', 'output_kafka_topic': '', 'output_dir': '', 'output_format': 'N-TRIPLES', 'only_printable_chars': 'no', 'infer_sql_datatypes': 'no', 'logging_level': 'INFO', 'number_of_processes': '24'}
2025-01-07 16:54:17,248 | DEBUG: DATA SOURCE `BPI2013Inc`: {'mappings': '/tmp/tmp01jhm4bm.ttl'}


Generating knowledge graph...


2025-01-07 16:54:17,894 | INFO: 9 mapping rules retrieved.
2025-01-07 16:54:17,902 | DEBUG: All predicate maps are constant-valued, invariant subset is not enforced.
2025-01-07 16:54:17,906 | DEBUG: All graph maps are constant-valued, invariant subset is not enforced.
2025-01-07 16:54:17,909 | INFO: Mapping partition with 9 groups generated.
2025-01-07 16:54:17,911 | INFO: Maximum number of rules within mapping group: 1.
2025-01-07 16:54:17,912 | INFO: Mappings processed in 0.661 seconds.
2025-01-07 16:54:17,914 | DEBUG: Parallelizing with 24 cores.
2025-01-07 16:54:18,850 | INFO: Number of triples generated in total: 171814.


Knowledge graph generated.
Knowledge graph saved.
Generating First Order Logic representation...
First Order Logic representation generated.
