# Introduction
This notebook contains the code enabling transformation of an OCEL 2.0 log file to tEKG in live mode.

In [1]:
import time

In [2]:
import pm4py
import pandas as pd
import numpy as np
from datetime import datetime
from itertools import product
from neo4j import GraphDatabase

# Setup 
This section sets the initial variables and sets the environment.

In [3]:
# The notebook is set to transform the running example. 
# To transform other logs, you only need to set the log name properly using the experiment_name variable. It works if the log is in jsonocel format. 
# If your log is in another format, please refine the file_path variable. In this case, you also need to modify the pm4py.read.read_ocel2_json function in the next block to use the correct PM4Py function to read the correct format.

experiment_name = 'runningExample-course'
file_path = './ocel2/'+experiment_name+'.jsonocel'
experiment_path = './experiments/live_'+experiment_name+'.json'

In [4]:
# legend
# -------------------------------------------------------------------------------------------------
# Any number corresponds to the algorithm 1 line number in the paper
# The section within [] is within the loop given by the number before the open bracket.
# The section within () is within the if statement given by the number before the open parenthesis.

lbl_log = 'LOG'
lbl_class = 'CLASS'
lbl_event = 'EVENT'
lbl_entity =  'ENTITY'
lbl_snapshot = 'SNAPSHOT'
lbl_derived = 'DERIVED'
lbl_has = 'HAS'
lbl_observed = 'OBSERVED'
lbl_rel = 'REL'
lbl_corr='CORR'
lbl_df = 'DF'

lbl_meta_node_log = 'node:Log'  # lines 4-6
lbl_meta_node_class = 'node:Class' # lines 7-9
lbl_meta_node_event = 'node:Event' # lines 10[11-12]
lbl_meta_node_entity = 'node:Entity' # lines 15[16-17]
lbl_meta_node_snapshot = 'node:Snapshot' # lines 15[18,19[20-21]]

# lbl_meta_node_reified = 'node:Reified' # lines 34[35-37] includes the two following
lbl_meta_node_entity_reified = 'node:Reified_Entity' 
lbl_meta_node_snapshot_reified = 'node:Reified_Snapshot'

lbl_meta_rel_log_has_event = 'rel:has' #lines 10[13]
lbl_meta_rel_event_observed_class = 'rel:observed' #lines 10[14]
lbl_meta_rel_entity_snapshot_snapshot = 'rel:snapshot'  # lines 15[18,19[22]]
lbl_meta_rel_snapshot_rel_update_snapshot = 'rel:rel:SnapshotUpdate'  # lines 15[18,23[24(25)]]
lbl_meta_rel_entity_rel_entity = 'rel:rel:Entity' # lines 27[28]
lbl_meta_rel_snapshot_rel_snapshot = 'rel:rel:Snapshot' # lines 27[29[30,31[32(33)]]]

lbl_meta_rel_derived = 'rel:derived' # lines 34[38-39]


lbl_meta_rel_event_corr = 'rel:corr' # 
lbl_meta_rel_event_corr_entity = 'rel:corr:Entity' # lines 40[41]
lbl_meta_rel_event_corr_entity_reified = 'rel:corr:ReifiedEntity' # lines 40[42[43]]

lbl_meta_rel_event_corr_snapshot  = 'rel:corr:Snapshot' # lines 40[44,45[46(47)]]
lbl_meta_rel_event_corr_snapshot_reified  ='rel:corr:ReifiedSnapshot' # lines 40[44,45[46(48[49])]]

lbl_meta_rel_event_df_entity_event  ='rel_Event-df[entity]->Event'
lbl_meta_rel_event_df_snapshot_event  ='rel_Event-df[snapshot]->Event'
lbl_meta_rel_event_df_event='rel:df' # line 50

In [5]:
meta_time = {}

In [6]:
URI  = 'bolt://localhost:7687'
AUTH = ('neo4j', '1234')

In [7]:
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()

In [8]:
ocel = pm4py.read.read_ocel2_json(file_path)

In [9]:
## reset
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query('MATCH (a) DETACH DELETE a')

# Log node
This section adds a node to tEKG for the log.

In [10]:
action = lbl_meta_node_log
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query('CREATE (:'+lbl_log+')')

end = time.time()
print(end - start)

meta_time[action] =  end - start

0.13913440704345703


# Class nodes
This section adds nodes to tEKG for classes representing event types in OCEL.

In [11]:
action = lbl_meta_node_class
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    for c in ocel.events[ocel.event_activity].unique():
        driver.execute_query("CREATE (:"+lbl_class+" {ID: '"+c+"'})")

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.11440849304199219


# Event nodes
This section adds nodes to tEKG for events corresponding to events in OCEL.

In [12]:
action = lbl_meta_node_event
start = time.time()


with GraphDatabase.driver(URI, auth=AUTH) as driver:
    for idx, row in ocel.events.iterrows():
        driver.execute_query("CREATE (:"+lbl_event+" {EventID: '"+
            row[ocel.event_id_column]+
            "', timestamp: datetime('"+
            str(row[ocel.event_timestamp].strftime('%Y-%m-%dT%H:%M')+':00.000+0100')+
            "'), Activity:'"+
            row[ocel.event_activity]+
            "'})")

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.13113856315612793


# Entity nodes
This section adds nodes to tEKG for entities corresponding to objects in OCEL.

In [13]:
action = lbl_meta_node_entity
start = time.time()

cols = list(ocel.objects.columns)
def map(n):
    return '`'+n.replace(ocel.object_id_column, "ID").replace(ocel.object_type_column, "EntityType")+'`'

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    for idx, rows in ocel.objects.fillna('').iterrows():
            atts = [map(c)+":'"+str(rows[c])+"'" for c in cols]
            res = ""
            for a in atts:
                res = res + a + ", "
        
                
            driver.execute_query("CREATE (:"+lbl_entity+" {"+
                  res[:-2] +
                 "})"
                 )
        
end = time.time()
print(end - start)
meta_time[action] =  end - start

0.053876638412475586


# Snapshot nodes
This section adds nodes to tEKG for snapshots that materialize OCEL objects when their value has changed.

In [14]:
action = lbl_meta_node_snapshot
start = time.time()


with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query("match (ent:"+lbl_entity+") create (ins:INSTANCENODE) set ins = ent")
    driver.execute_query("match (n:INSTANCENODE) set n.timestamp = datetime('" + 
                         datetime(1970, 1, 1, 0, 0, 0, tzinfo=pd.Timestamp.utcnow().tzinfo).strftime('%Y-%m-%dT%H:%M')+":00.000+0100')" # based on section 6.4 in OCEL 2.0 specification: "specifically 1970-01-01 00:00 UTC"
                        )

    for idx, row in ocel.object_changes.iterrows():
        driver.execute_query(
        "match (n:INSTANCENODE {ID: '"+row[ocel.object_id_column]+"'} ) with n order by n.timestamp DESC with collect(n)[0] as n CREATE (r:INSTANCENODE2) set r=n"
        )
    
        driver.execute_query(
        "match (n:INSTANCENODE2) set n.timestamp = datetime('"+str(row[ocel.event_timestamp].strftime('%Y-%m-%dT%H:%M')+':00.000+0100')+"')"+
            ", n.`" + row[ocel.changed_field] + "`='"+ str(row[row[ocel.changed_field]]) +"'"
        )
    
        driver.execute_query(
        "match (o:INSTANCENODE2) CREATE (n:INSTANCENODE) set n=o"
            )
    
        driver.execute_query(
        "match (o:INSTANCENODE2) DELETE o"
            )

    
    driver.execute_query("match (n:INSTANCENODE) set n.rootID = n.ID"
                        )
    driver.execute_query("match (n:INSTANCENODE) set n.ID = '('+n.ID+','+n.timestamp+')'"
                        )
    
    driver.execute_query("match (ent:INSTANCENODE) create (ins:"+lbl_snapshot+") set ins = ent")
    driver.execute_query("match (n:INSTANCENODE) DELETE n")
        

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.4860105514526367


# has edges
This section adds edges labeled "has" to tEKG to connect the Log node to the Event nodes.

In [15]:
action = lbl_meta_rel_log_has_event
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query('MATCH (l:'+lbl_log+'), (e:'+lbl_event+') MERGE (l)-[:'+lbl_has+']->(e)')

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.13711047172546387


# observed edges
This section adds edges labeled "observed" to tEKG to connect Event nodes to Class nodes.

In [16]:
action = lbl_meta_rel_event_observed_class
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query('MATCH (e:'+lbl_event+'), (c:'+lbl_class+') WHERE  e.Activity=c.ID MERGE (e)-[:'+lbl_observed+']->(c)')

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.08067893981933594


# rel edges (Entity2Entity)
This section adds edges labeled "rel" to tEKG to connect Entity nodes to Entity nodes.

In [17]:
# 
action = lbl_meta_rel_entity_rel_entity
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    for idx,row in ocel.o2o.iterrows():
        o1 = row[ocel.object_id_column]
        o2 = row[ocel.object_id_column+'_2']
        q  = row[ocel.qualifier]
    
        driver.execute_query(
            "MATCH (o1:"+lbl_entity+" {ID: '"+str(o1)+"'}), (o2:"+lbl_entity+" {ID: '"+str(o2)+"'}) MERGE (o1)-[:"+lbl_rel +" {qual:'"+str(q)+"'}]->(o2)" 
        )

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.06569480895996094


# snapshot edges
This section adds edges labeled "snapshot" to tEKG to connect Entity nodes to Snapshot nodes.

In [18]:
action = lbl_meta_rel_entity_snapshot_snapshot
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query('MATCH (o1:'+lbl_entity+'), (o2:'+lbl_snapshot+' {rootID: o1.ID}) MERGE (o1)-[:'+lbl_snapshot+']->(o2)')

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.06178617477416992


# rel edges (update)
This section adds edges labeled "rel" to tEKG to connect Snapshot nodes to Snapshot nodes. These edges show the lifecycle of an object through time, during which the value of the object's properties changed over time. 

In [19]:
action = lbl_meta_rel_snapshot_rel_update_snapshot
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query(""+
        "MATCH (n:"+lbl_entity+")-[:"+lbl_snapshot+"]->(i:"+lbl_snapshot+") "+
        "WITH n, i order by i.timestamp "+
        "WITH n, collect(i) as iss, range(0, size(collect(i))-2) as issn "+
        "UNWIND issn as i MATCH (a), (b) "+
        "WHERE a=iss[i] and b=iss[i+1] "+
        "MERGE (a)-[:"+lbl_rel+" {qual:'UPDATE'}]->(b)")

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.14839911460876465


# rel edges (Snapshot2Snapshot)
This section adds edges labeled "rel" to tEKG to connect Snapshot nodes to Snapshot nodes.

In [20]:
action = lbl_meta_rel_snapshot_rel_snapshot
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query(""+
            "MATCH (e1i)<-[:"+lbl_snapshot+"]-(e1)-[r:"+lbl_rel+"]->(e2)-[:"+lbl_snapshot+"]->(e2i) "+
            "WHERE e1i.timestamp>=e2i.timestamp "+
            "WITH e1i, r.qual as qualification, e2i order by e2i.timestamp desc "+
            "WITH e1i, collect(e2i)[0] as e2i, qualification "+
            "MATCH (a), (b) "+
            "WHERE a=e1i and b=e2i "+
            "MERGE  (a)-[:"+lbl_rel+" {qual:qualification}]->(b) ")

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.11516880989074707


# Entity nodes (reified)
This section adds nodes labeled "Entity" to tEKG for the reified ones.

In [21]:
action = lbl_meta_node_entity_reified 
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query(
            "MATCH (a:"+lbl_entity+")-[r:REL]->(b:"+lbl_entity+") "+
            "CREATE (c:"+lbl_entity+":REIFIED {ID:'('+a.ID+','+b.ID+')', EntityType:'('+a.EntityType+','+b.EntityType+')', rel1:a.ID, rel2:b.ID})"
        )
    
end = time.time()
print(end - start)
meta_time[action] =  end - start

0.05899357795715332


# Snapshot nodes (reified)
This section adds nodes labeled "Snapshot" to tEKG for the reified ones.

In [22]:
action = lbl_meta_node_snapshot_reified 
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    
    driver.execute_query(
            "MATCH (a:"+lbl_snapshot+")-[r:REL]->(b:"+lbl_snapshot+") "+
            "CREATE (c:"+lbl_snapshot+":REIFIED {ID:'('+a.ID+','+b.ID+')', EntityType:'('+a.EntityType+','+b.EntityType+')', rel1:a.ID, rel2:b.ID})"
        )
    
end = time.time()
print(end - start)
meta_time[action] =  end - start

0.07143712043762207


# derived edges
This section adds edges labeled "derived" to tEKG to connect reified nodes (either Snapshot or Entity) to their corresponding Snapshot or Entity nodes.

In [23]:
action = lbl_meta_rel_derived
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:    

    driver.execute_query(
            "MATCH (a:REIFIED), (b) "+
            "WHERE b.ID=a.rel1 or b.ID=a.rel2 "+
            "MERGE (a)-[:"+lbl_derived+"]->(b)"
        )

    driver.execute_query(
            "MATCH (n:REIFIED) "+
            "REMOVE n.rel1, n.rel2"
        )

    driver.execute_query(
            "MATCH (n:REIFIED) "+
            "REMOVE n:REIFIED"
        )
    
    driver.execute_query(
            "MATCH (a)<-[:"+lbl_derived+"]-(x)-[:"+lbl_derived+"]->(b) "+
            "WHERE a.timestamp IS NOT NULL and a.timestamp<=b.timestamp "+ # is not null is replaced with exist() function in newer neo4j versions.
            "SET "+
            "    x.root1ID = a.rootID, "+
            "    x.root2ID = b.rootID, "+
            "    x.root1timestamp = a.timestamp, "+
            "    x.root2timestamp = b.timestamp"
        )
    
end = time.time()
print(end - start)
meta_time[action] =  end - start

0.2457294464111328


# corr edges
This section adds edges labeled "corr" to tEKG. 

## corr (Entity)
This sub-section adds edges labeled "corr" to tEKG to connect Event nodes to Entity nodes.

In [24]:
action = lbl_meta_rel_event_corr_entity
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    for idx,row in ocel.relations.iterrows():
        e = row[ocel.event_id_column]
        o = row[ocel.object_id_column]
        q  = row[ocel.qualifier]

        driver.execute_query(
            "MATCH (e:"+lbl_event+" {EventID:'" + str(e) + "'}), (o:"+lbl_entity+" {ID:'"+str(o)+"'}) " +
            "MERGE (e)-[:"+lbl_corr+"]->(o)"
        )

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.17481541633605957


## corr (Snapshot)
This sub-section adds edges labeled "corr" to tEKG to connect Event nodes to Snapshot nodes.

In [25]:
action = lbl_meta_rel_event_corr_snapshot
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    for idx,row in ocel.relations.iterrows():
        e = row[ocel.event_id_column]
        o = row[ocel.object_id_column]
        q  = row[ocel.qualifier]
    
        driver.execute_query(
            "MATCH (e:"+lbl_event+" {EventID:'" + str(e) + "'}), (o:"+lbl_snapshot+" {rootID:'"+str(o)+"'}) " +
            "WHERE e.timestamp>=o.timestamp " + 
            "WITH e, o order by o.timestamp desc " + 
            "WITH e, collect(o)[0] as o " + 
            "MERGE (e)-[:"+lbl_corr+"]->(o)"
        )

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.2141125202178955


In [26]:
# The two following blocks can be merged in production. They are implemented separately in this experiment to enable measuring times separately. (One loop is enough)

## corr (reified Entity)
This sub-section adds edges labeled "corr" to tEKG to connect Event nodes to reified Entity nodes.

In [27]:
action = lbl_meta_rel_event_corr_entity_reified
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query(
            "MATCH (e:"+lbl_event+")-[:"+lbl_corr+"]->(o:"+lbl_entity+") "+
            "MATCH (o1)-[:"+lbl_derived+"]->(o) "+
            "MERGE (e)-[:"+lbl_corr+"]->(o1) "
        )

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.04520750045776367


## corr (reified Snapshot)
This sub-section adds edges labeled "corr" to tEKG to connect Event nodes to reified Snapshot nodes.

In [28]:
action = lbl_meta_rel_event_corr_snapshot_reified  
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:

    driver.execute_query(
            "MATCH (e:"+lbl_event+")-[:"+lbl_corr+"]->(o:"+lbl_snapshot+") "+
            "MATCH (o1)-[:"+lbl_derived+"]->(o) "+
            "MERGE (e)-[:"+lbl_corr+"]->(o1) "
        )

end = time.time()
print(end - start)
meta_time[action] =  end - start

0.06093311309814453


# df edges
This section adds edges labeled "df" to tEKG to connect Events nodes to Entity or Snapshot nodes.

In [29]:
action = lbl_meta_rel_event_df_event
start = time.time()

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    
    driver.execute_query(
    "MATCH (e:"+lbl_event+")-[:"+lbl_corr+"]->(n)  " +
    "WITH n, e order by e.timestamp  " +
    "WITH n, collect(e) as es, range(0, size(collect(e))-2) as esn " +
    "UNWIND esn as i  " +
    "MATCH (a), (b)  " +
    "WHERE a=es[i] and b=es[i+1]  " +
    "MERGE (a)-[:"+lbl_df+" {EntityType:n.EntityType, EntityID:n.ID}]->(b) "
    )

# removing parallel dfs

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.execute_query(
        "MATCH ()-[r:"+lbl_df+"]->() " +
        "SET r.addNewKnowledge = TRUE " 
    )
    driver.execute_query(
        "MATCH (n)<-[:"+lbl_corr+"]-(e1:"+lbl_event+")-[r1:"+lbl_df+" {EntityID:n.ID}]->(e2:"+lbl_event+") " +
        "MATCH (n)<-[:"+lbl_derived+"]-(rn)<--(e1)-[r2:"+lbl_df+"  {EntityID:rn.ID}]->(e2) " +
        "SET r2.addNewKnowledge = FALSE " 
    )
    driver.execute_query(
        "MATCH ()-[rb:"+lbl_df+" {addNewKnowledge:TRUE}]->()-[r:"+lbl_df+"  {EntityID:rb.ID}]->()-[ra:"+lbl_df+"  {EntityID:r.ID, addNewKnowledge:TRUE}]->() " +
        "SET r.addNewKnowledge=TRUE " 
    )
    driver.execute_query(
        "MATCH ()-[r:"+lbl_df+" {addNewKnowledge:FALSE}]->() " +
        "DELETE r " 
    )
    driver.execute_query(
        "MATCH ()-[r:"+lbl_df+"]->() " +
        "REMOVE  r.addNewKnowledge " 
    )


end = time.time()
print(end - start)
meta_time[action] =  end - start

0.37335729598999023


# Exporting meta data

In [30]:
import json

with open(experiment_path, "w") as fp:
    json.dump(meta_time, fp)  # encode dict into JSON