## Install Required Libraries

In [77]:
!pip install neo4j



## Import Libraries

In [78]:
import xml.etree.ElementTree as ET
from neo4j import GraphDatabase
import os
import re
import html

## Define Connection to Neo4j

In [79]:
# Neo4j connection details
uri = "bolt://localhost:7687"
username = "neo4j"
password = "170202Kcf"

# Create a driver instance
driver = GraphDatabase.driver(uri, auth=(username, password))

In [80]:
def test_connection():
    try:
        with driver.session(database="erpbpmn") as session:
            result = session.run("RETURN 1 AS test")
            for record in result:
                print(f"Connection successful, test query result: {record['test']}")
    except Exception as e:
        print(f"Failed to connect to Neo4j: {e}")

## Define Functions to Create Nodes and Relationships

In [81]:
# Define functions to create nodes and relationships
def create_node(tx, label, properties):
    query = (
        f"MERGE (n:{label} {{id: $properties.id}}) "
        "SET n += $properties "
        "RETURN n"
    )
    result = tx.run(query, properties=properties)
    return result.single()[0]

def create_relationship_with_id(tx, label1, id1, label2, id2, rel_type, properties):
    query = (
        f"MATCH (a:{label1} {{id: $id1}}), (b:{label2} {{id: $id2}}) "
        f"MERGE (a)-[r:{rel_type} {{id: $properties.id}}]->(b) "
        "SET r += $properties "
        "RETURN r"
    )
    result = tx.run(query, id1=id1, id2=id2, properties=properties)
    record = result.single()
    if record:
        return record[0]
    else:
        print(f"Warning: Could not create relationship {rel_type} between {label1}({id1}) and {label2}({id2}). One of the nodes may not exist.")
        return None

In [82]:
# Function to clean and format name values
def clean_name(name):
    # Remove HTML tags
    name = re.sub('<[^<]+?>', '', name)
    # Decode HTML entities
    name = html.unescape(name)
    # Remove leading/trailing whitespace
    name = name.strip()
    return name

## Parse BPMN XML Files and Load into Neo4j

Loop through each BPMN XML file in the `assets` directory.

In [83]:
# Function to parse draw.io BPMN XML files
def parse_drawio_bpmn_xml(file_path, level):
    tree = ET.parse(file_path)
    root = tree.getroot()

    tasks = []
    events = []
    gateways = []
    sequence_flows = []

    # Define the namespace if any
    namespace = {'mx': 'http://www.mxgraph.com/2006/mxGraphModel'}

    # Adjusted parsing to account for accurate element paths
    for cell in root.findall('.//mxCell'):
        style = cell.get('style', '')
        cell_id = cell.get('id')
        value = clean_name(cell.get('value', ''))
        if 'shape=mxgraph.bpmn.task' in style:
            tasks.append({'id': f"{level}_{cell_id}", 'name': value, 'level': level})
        elif 'shape=mxgraph.bpmn.event' in style:
            if 'outline=end' in style:
                events.append({'id': f"{level}_{cell_id}", 'name': value, 'type': 'EndEvent', 'level': level})
            else:
                events.append({'id': f"{level}_{cell_id}", 'name': value, 'type': 'StartEvent', 'level': level})
        elif 'shape=mxgraph.bpmn.gateway' in style or 'shape=mxgraph.bpmn.gateway2' in style:
            gateways.append({'id': f"{level}_{cell_id}", 'name': value, 'type': 'ExclusiveGateway', 'level': level})
        elif cell.get('edge') == '1':
            sequence_flows.append({
                'id': f"{level}_{cell_id}",
                'sourceRef': f"{level}_{cell.get('source')}",
                'targetRef': f"{level}_{cell.get('target')}",
                'name': value,
                'level': level
            })

    return tasks, events, gateways, sequence_flows, root

In [84]:
# Helper function to determine the label of an element by its ID
def get_element_label_by_id(root, element_id):
    # Extract level and original ID
    try:
        level, cell_id = element_id.split('_', 1)
    except ValueError:
        cell_id = element_id
    for cell in root.findall('.//mxCell'):
        if cell.get('id') == cell_id:
            style = cell.get('style', '').lower()
            if 'shape=mxgraph.bpmn.task' in style:
                return 'Task'
            elif 'shape=mxgraph.bpmn.event' in style:
                if 'outline=end' in style:
                    return 'EndEvent'
                else:
                    return 'StartEvent'
            elif 'shape=mxgraph.bpmn.gateway' in style or 'shape=mxgraph.bpmn.gateway2' in style:
                return 'ExclusiveGateway'
            else:
                return 'Element'
    return 'Element'

In [85]:
# Main function to process all BPMN files
def process_bpmn_files():
    bpmn_dir = './assets'
    with driver.session(database="erpbpmn") as session:
        for filename in os.listdir(bpmn_dir):
            if filename.endswith('.xml'):
                file_path = os.path.join(bpmn_dir, filename)
                # Extract level from filename (assuming filename contains 'Level X')
                match = re.search(r'Level (\d+)', filename, re.IGNORECASE)
                level = int(match.group(1)) if match else 0
                print(f"\nProcessing {filename} at Level {level}...")
                tasks, events, gateways, sequence_flows, root = parse_drawio_bpmn_xml(file_path, level)

                print(f"Parsed {len(tasks)} tasks, {len(events)} events, {len(gateways)} gateways, {len(sequence_flows)} sequence flows from {filename}")

                # Create nodes
                for task in tasks:
                    session.write_transaction(create_node, 'Task', task)
                    print(f"Created Task node: {task}")
                for event in events:
                    session.write_transaction(create_node, event['type'], event)
                    print(f"Created {event['type']} node: {event}")
                for gateway in gateways:
                    session.write_transaction(create_node, gateway['type'], gateway)
                    print(f"Created {gateway['type']} node: {gateway}")

                # Create relationships
                for flow in sequence_flows:
                    source_id = flow['sourceRef']
                    target_id = flow['targetRef']
                    rel_properties = {'id': flow['id'], 'name': flow.get('name'), 'level': level}
                    # Determine source and target labels
                    source_label = get_element_label_by_id(root, source_id)
                    target_label = get_element_label_by_id(root, target_id)
                    # Create relationship
                    rel_created = session.write_transaction(
                        create_relationship_with_id,
                        source_label, source_id, target_label, target_id,
                        'SEQUENCE_FLOW', rel_properties
                    )
                    if rel_created:
                        print(f"Created SEQUENCE_FLOW relationship from {source_label}({source_id}) to {target_label}({target_id}) with properties {rel_properties}")
                    else:
                        print(f"Failed to create SEQUENCE_FLOW relationship from {source_id} to {target_id}")


In [86]:
# Run the processing function
if __name__ == "__main__":
    test_connection()
    process_bpmn_files()
    driver.close()

Connection successful, test query result: 1

Processing BPMN Account Payable Level 1.xml at Level 1...
Parsed 1 tasks, 2 events, 0 gateways, 2 sequence flows from BPMN Account Payable Level 1.xml
Created Task node: {'id': '1_6', 'name': 'Account Payable', 'level': 1}
Created StartEvent node: {'id': '1_4', 'name': '', 'type': 'StartEvent', 'level': 1}
Created EndEvent node: {'id': '1_8', 'name': '', 'type': 'EndEvent', 'level': 1}
Created SEQUENCE_FLOW relationship from Task(1_6) to EndEvent(1_8) with properties {'id': '1_5', 'name': '', 'level': 1}
Created SEQUENCE_FLOW relationship from StartEvent(1_4) to Task(1_6) with properties {'id': '1_7', 'name': '', 'level': 1}

Processing BPMN Account Payable Level 2.xml at Level 2...
Parsed 21 tasks, 2 events, 11 gateways, 38 sequence flows from BPMN Account Payable Level 2.xml
Created Task node: {'id': '2_25', 'name': 'Reviewing Purchase Return', 'level': 2}
Created Task node: {'id': '2_26', 'name': 'Approving Purchase Return', 'level': 2}
C

  session.write_transaction(create_node, 'Task', task)
  session.write_transaction(create_node, event['type'], event)
  rel_created = session.write_transaction(
  session.write_transaction(create_node, gateway['type'], gateway)


Created ExclusiveGateway node: {'id': '2_72', 'name': '', 'type': 'ExclusiveGateway', 'level': 2}
Created ExclusiveGateway node: {'id': '2_80', 'name': '', 'type': 'ExclusiveGateway', 'level': 2}
Created SEQUENCE_FLOW relationship from ExclusiveGateway(2_80) to ExclusiveGateway(2_13) with properties {'id': '2_3', 'name': '', 'level': 2}
Created SEQUENCE_FLOW relationship from ExclusiveGateway(2_80) to ExclusiveGateway(2_41) with properties {'id': '2_4', 'name': '', 'level': 2}
Created SEQUENCE_FLOW relationship from Task(2_64) to Task(2_32) with properties {'id': '2_6', 'name': '', 'level': 2}
Created SEQUENCE_FLOW relationship from Task(2_67) to Task(2_31) with properties {'id': '2_7', 'name': '', 'level': 2}
Created SEQUENCE_FLOW relationship from Task(2_56) to Task(2_25) with properties {'id': '2_8', 'name': '', 'level': 2}
Created SEQUENCE_FLOW relationship from Task(2_42) to Task(2_27) with properties {'id': '2_9', 'name': '', 'level': 2}
Created SEQUENCE_FLOW relationship from Ex

## Close the Driver

In [87]:
driver.close()