In [22]:
import os
import requests
from tensorflow.keras.models import load_model

def download_and_load_model(file_url, model_dir):
    # Extract the model name from the file_url
    model_name = file_url.split('/')[-1]
    # Create the model path by joining the model directory and model name
    model_path = os.path.join(model_dir, model_name)

    # Create the directory if it does not exist
    os.makedirs(model_dir, exist_ok=True)

    # Send a HTTP request to the URL of the file, with the parameter stream set to True
    response = requests.get(file_url, stream=True)

    # Check if the request is successful
    if response.status_code == 200:
        # Open the file in binary mode and write the response content to it
        with open(model_path, 'wb') as file:
            for chunk in response.iter_content(chunk_size=128):
                file.write(chunk)

    # Load the model from the file
    keras_model = load_model(model_path)
    return keras_model

# URL of the model file
file_url = "https://huggingface.co/svercoutere/bpmn-search-0.1.0/resolve/main/bpmn_search_embedding_model.h5"
model_dir = 'models\\search_models\\'

keras_model = download_and_load_model(file_url, model_dir)

ValueError: Unknown layer: 'GCNConv'. Please ensure you are using a `keras.utils.custom_object_scope` and that this object is included in the scope. See https://www.tensorflow.org/guide/keras/save_and_serialize#registering_the_custom_object for details.

In [99]:
import os
import json
from tqdm import tqdm
import requests
import time
import random

def process_json_file(file_path, num_skips = 0, num_samples=-1, create_embeddings=False):
    errors = 0
    seen_texts = set()
    embeddings = {}
    beschrijvingen = {}
    
    # Read the JSON file
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    bindings = data['results']['bindings']
    random.shuffle(bindings)

    # Process each binding
    for binding in tqdm(bindings[num_skips:num_samples]):
        # Get the 'sub' value
        sub = binding['url']['value'] if 'url' in binding else binding['sub']['value']
        # Get the 'beschrijving' value

        beschrijving = ''
        if 'titel' in binding:
            beschrijving += binding['titel']['value'] +" : "
        
        beschrijving += binding['beschrijving']['value']
        beschrijving = beschrijving.strip()

        # Skip if this text has already been seen
        if beschrijving in seen_texts:
            continue

        # Add the text to the set of seen texts
        seen_texts.add(beschrijving)

        # Prepare the data for the POST request
        data = {
            "text": beschrijving,
            "uri": sub
        }

        if data['text'] == " : ":
            print(data)
            errors+=1
            continue

        if create_embeddings:
            embeddings[sub] = graph_embedder.extract_document_embedding(data['text'])
            beschrijvingen[sub] = data['text']
        continue
        # Make the POST request
        response = requests.post(
            'http://localhost:2000/tasks/text',
            headers={'accept': 'application/json', 'Content-Type': 'application/json'},
            data=json.dumps(data)
        )
    
        # Check the response
        if response.status_code != 200:
            print(f"POST request failed with status code {response.status_code} for URI {sub}")
            print(f"Error message: {response.text}")
    print(f"Errors: {errors} from Seen Texts: {len(seen_texts)}")
    return embeddings, beschrijvingen

embeddings , beschrijvingen = process_json_file('demo/agendapunten/besluitenVlaanderen.json',num_skips = 0, num_samples=10000, create_embeddings=True)

 58%|█████▊    | 5850/10000 [09:36<08:13,  8.41it/s]

In [94]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def find_most_similar(query, embeddings, details, top_n=5):
    # Get the embedding of the query
    query_embedding = graph_embedder.extract_document_embedding(query)
    print(query_embedding.shape)
    
    # Calculate the similarity between the query and each embedding
    similarities = {url: cosine_similarity([query_embedding], [embedding]) for url, embedding in embeddings.items()}
    #similarities = {url: np.dot(query_embedding, embedding) for url, embedding in embeddings.items()}    

    # Sort the URLs by similarity
    sorted_urls = sorted(similarities, key=similarities.get, reverse=True)
    
    # Get the top_n most similar URLs
    top_urls = sorted_urls[:top_n]
    
    # Prepare the results
    results = []
    for url in top_urls:
        result = {
            'url': url,
            'beschrijving': details[url],
            'score': similarities[url]
        }
        results.append(result)
    
    return results

query = "Bruneaustraat"
results = find_most_similar(query, embeddings, beschrijvingen, top_n=10)

for result in results:
    print(f"URL: {result['url']}")
    print(f"Beschrijving: {result['beschrijving']}")
    print(f"Score: {result['score']}")
    print()

URL: https://data.gent.be/id/besluiten/22.0321.4591.0538
Beschrijving: 2022_GR_00357 - Definitieve vaststelling van de naam  'Emelia Dutrystraat' voor de gedeeltelijke straatnaamwijziging van Smalleheerweg voor het wegsegment ten zuiden van R4 te Oostakker en aanbrengen van een onderschrift op het straatnaambord - Goedkeuring
Score: [[0.5838779]]

URL: http://data.lblod.info/id/cc01de10-3aff-11ee-a745-971d7f79422e
Beschrijving: Het college neemt kennis van de vraag naar aanleg van voetpaden in de Damstraat (tot aan ABC-wijk en tot aan J. de Behunelaan).
Score: [[0.57110536]]

URL: http://data.lblod.info/id/f607fac0-35a8-11ee-a745-971d7f79422e
Beschrijving: Voor een verhuis op 2 juni wordt parkeerverbod toegestaan aan Krakkestraat 20.
Score: [[0.5486572]]

URL: http://data.lblod.info/id/531466e0-35db-11ee-a745-971d7f79422e
Beschrijving: Goedkeuring voor het herstellen van een deel van de Brandstraat door Koch-Ockier nv, volgens de raamovereenkomst 'Onderhoud gemeentewegen'.
Score: [[0.5

In [1]:
import uuid

def generate_uuid():
    """Generates a random unique user id (UUID) based on the host ID and current time"""
    return str(uuid.uuid1())

In [7]:
!pip install spektral nltk bs4

Collecting spektral
  Using cached spektral-1.3.1-py3-none-any.whl.metadata (5.9 kB)
Collecting lxml (from spektral)
  Downloading lxml-5.2.2-cp311-cp311-win_amd64.whl.metadata (3.5 kB)
Collecting libclang>=13.0.0 (from tensorflow-intel==2.13.0->tensorflow>=2.2.0->spektral)
  Using cached libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting numpy (from spektral)
  Using cached numpy-1.24.3-cp311-cp311-win_amd64.whl.metadata (5.6 kB)
Collecting typing-extensions<4.6.0,>=3.6.6 (from tensorflow-intel==2.13.0->tensorflow>=2.2.0->spektral)
  Using cached typing_extensions-4.5.0-py3-none-any.whl.metadata (8.5 kB)
Collecting tensorflow-estimator<2.14,>=2.13.0 (from tensorflow-intel==2.13.0->tensorflow>=2.2.0->spektral)
  Using cached tensorflow_estimator-2.13.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1 (from tensorflow-intel==2.13.0->tensorflow>=2.2.0->spektral)
  Using cached tensorflow_io_gcs_filesystem-0.31.0-cp311-cp311-win_a

  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ipython 8.24.0 requires typing-extensions>=4.6; python_version < "3.12", but you have typing-extensions 4.5.0 which is incompatible.
torch 2.3.0 requires typing-extensions>=4.8.0, but you have typing-extensions 4.5.0 which is incompatible.


In [1]:
from library.BPMNGraph import BPMNGraph
from library.BPMNGraphEmbedder import BPMNGraphEmbedder
import json

graphEmbedder = BPMNGraphEmbedder()



In [8]:
bpmn_graph = BPMNGraph(graph_uuid="01", data = r"demo\bpmn\Meldingen-Kortemark.bpmn")

In [9]:
bpmn_graph.get_nodes()

NodeDataView({'Process_1hgvumz': {'type': 'process', 'name': '', 'documentation': '', 'incoming': [], 'outgoing': ['Lane_13jiq4q', 'Lane_1hqc46l', 'Lane_05fvbp3', 'Lane_0gvmuhy', 'Lane_03s0w4c', 'Lane_04amrvc', 'Lane_0ocxeut', 'Lane_1j8dacz']}, 'DataObjectReference_0qr13xv': {'type': 'dataObjectReference', 'name': 'Melder', 'documentation': '', 'incoming': [], 'outgoing': []}, 'StartEvent_1': {'type': 'startEvent', 'name': '', 'documentation': '', 'incoming': [], 'outgoing': ['Flow_12vvfcr']}, 'Activity_1t15w2o': {'type': 'userTask', 'name': 'Invullen meldingsformulier op website', 'documentation': '', 'incoming': ['Flow_12vvfcr'], 'outgoing': ['Flow_1d0us1o']}, 'Event_08ga2v1': {'type': 'startEvent', 'name': '', 'documentation': '', 'incoming': [], 'outgoing': ['Flow_1tfxbec']}, 'Activity_129wbex': {'type': 'userTask', 'name': 'Telefoon naar onthaal', 'documentation': '', 'incoming': ['Flow_1tfxbec'], 'outgoing': ['Flow_1oc3r5b']}, 'Event_15vgiju': {'type': 'startEvent', 'name': '', '

In [11]:
graphEmbedder.process_graph(bpmn_graph, text_attributes=["text","name","text"])



<library.BPMNGraph.BPMNGraph at 0x2340692af50>

In [10]:
encoded_text = graphEmbedder.get_embedding_model().encode('Hello, World!')
print(encoded_text)

[ 1.55190676e-02  2.89158851e-01  3.21475834e-01  2.44209409e-01
 -1.40331045e-01 -4.25775886e-01  3.97883505e-01  1.66826501e-01
 -3.17398310e-01  4.32004303e-01  1.22076809e-01 -6.64234877e-01
 -8.29535946e-02  5.49005345e-04  1.50834024e-01  2.45756488e-02
  2.68378723e-02 -2.62621731e-01 -6.47644699e-01 -2.67333061e-01
  5.53249121e-02 -7.11291358e-02 -2.78172165e-01  1.76428273e-01
  2.40024462e-01 -4.87940051e-02 -2.18747016e-02  4.65551943e-01
  5.55829816e-02 -3.31043690e-01 -2.66440481e-01 -1.35581121e-01
  4.89880174e-01  1.44641519e-01 -2.66885936e-01  3.05552185e-01
 -8.56959596e-02 -1.60764053e-01 -1.18043937e-01  1.85951129e-01
  1.97065219e-01 -1.46485880e-01  2.26103678e-01  2.09658489e-01
  4.81629558e-02 -1.91175342e-01  5.99479228e-02  1.79655448e-01
  1.47440657e-01  4.77783792e-02  1.39603645e-01 -1.78308770e-01
 -3.51009518e-01  4.61889803e-02  3.29692245e-01  1.25393793e-01
 -9.25428495e-02 -3.77442576e-02 -2.74774935e-02  1.14296712e-01
 -2.91877776e-01  2.85157

In [99]:
# Processing the BPMN files
def generate_insert_query(bpmn_graph, sparql_graph):
    """
    Generate SPARQL INSERT queries for a BPMNGraph.

    Parameters:
    bpmn_graph (BPMNGraph): The BPMN graph to generate queries for.
    sparql_graph (str): The URI of the SPARQL graph to insert data into.

    Returns:
    list: A list of SPARQL INSERT queries.
    """

    # Initialize the list of queries
    queries = []

    # Generate the query for the BPMN graph
    uid = generate_uuid()
    encoded_uri = sparql_escape_uri(f"http://example.com/{uid}")
    query = "PREFIX bpmn: <http://example.com/bpmn#>"
    query += f"INSERT DATA {{ GRAPH {sparql_graph} {{ {encoded_uri} a bpmn:Graph ; bpmn:filePath {sparql_escape_string(bpmn_graph.file_path)} ; bpmn:embedding {sparql_escape_string(json.dumps(bpmn_graph.network.graph.get('embedding').tolist()))} . }} }}"
    queries.append(query)

    # Generate the queries for the nodes
    for _, node in bpmn_graph.get_nodes():
        if node.get('name') and node.get('embedding') is not None:
            node_uid = generate_uuid()
            node_encoded_uri = sparql_escape_uri(f"http://example.com/{node_uid}")
            query = "PREFIX bpmn: <http://example.com/bpmn#>"
            query += f"INSERT DATA {{ GRAPH {sparql_graph} {{ {encoded_uri} bpmn:hasNode {node_encoded_uri} . {node_encoded_uri} a bpmn:Node ; bpmn:name {sparql_escape_string(node.get('name'))} ; bpmn:embedding {sparql_escape_string(json.dumps(node.get('embedding').tolist()))} . }} }}"
            queries.append(query)

    return queries

def process_bpmn_file(bpmn_file_path, sparql_graph):
    """
    Process a BPMN file and generate SPARQL INSERT queries for the graph and its nodes.

    Parameters:
    bpmn_file_path (str): The path to the BPMN file to process.
    sparql_graph (str): The URI of the SPARQL graph to insert data into.

    Returns:
    list: A list of SPARQL INSERT queries, or None if the file type is invalid.
    """
    if bpmn_file_path.endswith('.bpmn'):
        bpmn_graph = BPMNGraph(file_path = bpmn_file_path)
        graphEmbedder.process_graph(bpmn_graph.network, logging=False)
        bpmn_queries = generate_insert_query(bpmn_graph, sparql_graph)
        return bpmn_queries
    else:
        print("Invalid file type. Please provide a .bpmn file.")
        return None
    


In [101]:
sparlq_graph = "<http://mu.semte.ch/graphs/application>"
testing_file = list(all_bpmns.keys())[2]
process_bpmn_file(testing_file, sparlq_graph)

for query in process_bpmn_file(testing_file, sparlq_graph):
    print(query)

PREFIX bpmn: <http://example.com/bpmn#>INSERT DATA { GRAPH <http://mu.semte.ch/graphs/application> { <http://example.com/44e0480b-06f0-11ef-b9b9-70d823ebe76c> a bpmn:Graph ; bpmn:filePath """demo/bpmn/Jeugd-Fuifkwaliteitslabel-aanvraag-Poperinge_v0.1.bpmn""" ; bpmn:embedding """[0.12186718732118607, 0.06592314690351486, -0.07446733862161636, -0.07536166161298752, -0.017441721633076668, -0.02604934200644493, 0.2192714661359787, 0.14270970225334167, -0.03372982144355774, 0.06160412356257439, 0.006778970826417208, -0.1063249409198761, 0.09356041997671127, -0.05620177090167999, -0.03796269744634628, -0.030480122193694115, 0.05912928283214569, -0.06519386172294617, -0.05600249394774437, 0.03500610962510109, 0.1027102842926979, -0.12911149859428406, -0.011910471133887768, -0.05045151710510254, 0.011706219054758549, -0.044014979153871536, -0.1175495982170105, -0.008047500625252724, -0.020692924037575722, -0.2948693633079529, 0.0386071540415287, -0.16445055603981018, 0.18093258142471313, 0.086

In [8]:
import requests
import json

def get_embedding(text):
    url = 'http://localhost:2000/encode'
    headers = {
        'accept': 'application/json',
        'Content-Type': 'application/json',
    }
    data = json.dumps(text)

    response = requests.post(url, headers=headers, data=data)

    # return the embedding
    return response.json()



{'embedding': [0.40215590596199036, 0.09487263113260269, 0.03560946509242058, 0.1071549579501152, -0.0877557173371315, 0.4163108170032501, 0.196039080619812, 0.12206684798002243, -0.29957184195518494, -0.07157567888498306, 0.005875302013009787, -0.21485720574855804, -0.028008272871375084, -0.24372737109661102, -0.3480670154094696, 0.18561667203903198, -0.05847501382231712, 0.09140219539403915, 0.4041575491428375, -0.3150882422924042, 0.04607795551419258, 0.044831257313489914, -0.010723729617893696, -0.41473767161369324, -0.11588180065155029, 0.2849324643611908, 0.018009938299655914, 0.3284425139427185, -0.19884465634822845, 0.12511420249938965, -0.24852102994918823, 0.2824277877807617, 0.16509579122066498, 0.10332722216844559, -0.28224310278892517, -0.4065909683704376, 0.37157711386680603, -0.062253665179014206, 0.5820353031158447, -0.13370004296302795, 0.2568962872028351, -0.22632324695587158, -0.09970749169588089, 0.39800596237182617, 0.041040852665901184, 0.6527889370918274, 0.14353

In [4]:
import requests

def list_elasticsearch_indices(url: str):
    response = requests.get(f"{url}/_cat/indices?v")
    response.raise_for_status()  # Raises a HTTPError if the response status is 4xx, 5xx
    return response.text

elasticsearch_url = "http://localhost:9200"  # replace with your actual Elasticsearch URL
indices = list_elasticsearch_indices(elasticsearch_url)
print(indices)

health status index                            uuid                   pri rep docs.count docs.deleted store.size pri.store.size
green  open   .geoip_databases                 Jea84qU5S1yPoJ_N35yPRA   1   0         33            0     31.1mb         31.1mb
yellow open   f1e71764d100e4367367d8d1b4d248bf XISF9_zpTXuYou7eT4dMcQ   1   1          0            0       226b           226b
yellow open   4d8ce6ce737905f080da19a368ddde85 yxruP4IpRv-vuZwTsdLiWQ   1   1        299            0      2.1mb          2.1mb
yellow open   3f3c33ba10423754945a6b3c00949e37 LGmFF8Z0SqKz4Cr8dbUy7A   1   1          0            0       226b           226b
yellow open   487626dbe35f895e9e99e9477992f994 Ara-mdyPTJm2cQ_6_qOOCw   1   1          9            0     80.2kb         80.2kb



In [13]:
import requests
import json

def get_all_from_elasticsearch(url: str, index: str):
    headers = {'Content-Type': 'application/json'}
    data = {
        "query": {
            "match_all": {}
        }
    }
    response = requests.get(f"{url}/{index}/_search", headers=headers, data=json.dumps(data))
    response.raise_for_status()  # Raises a HTTPError if the response status is 4xx, 5xx
    return response.json()

elasticsearch_url = "http://localhost:9200"  # replace with your actual Elasticsearch URL
index = "4d8ce6ce737905f080da19a368ddde85"  # replace with your actual index name


documents = get_all_from_elasticsearch(elasticsearch_url, index)
print(documents)

{'took': 35, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 299, 'relation': 'eq'}, 'max_score': 1.0, 'hits': [{'_index': '4d8ce6ce737905f080da19a368ddde85', '_type': '_doc', '_id': 'http://example.com/9d68f01c-12c1-11ef-9a9b-0242ac1c0004', '_score': 1.0, '_source': {'name': 'Navragen of klacht in behandeling is bij klachten-behandelaar', 'embedding': [0.11860056966543198, 0.07757184654474258, -0.18737520277500153, 0.11011920869350433, 0.041535429656505585, -0.24801504611968994, 0.36455675959587097, 0.18536494672298431, 0.009001963771879673, -0.002263689413666725, -0.23571643233299255, -0.0371120348572731, 0.11723800748586655, 0.06960874050855637, 0.06005679443478584, 0.10550542920827866, 0.1400270164012909, -0.17514342069625854, 0.20960436761379242, 0.0855291336774826, -0.22807545959949493, -0.09166497737169266, 0.2339758574962616, -0.09809452295303345, -0.2512226104736328, 0.11861888319253922, 0.10088104009628296, 

In [6]:
def format_and_display_results(results):
    encode_start_time = results['took']
    encode_end_time = results['took'] + results['took']
    bm25 = results
    sem_search = results

    print(
        "Computing the embedding took {:.3f} seconds, BM25 search took {:.3f} seconds, semantic search with ES took {:.3f} seconds".format(
            encode_end_time - encode_start_time, bm25["took"] / 1000, sem_search["took"] / 1000
        )
    )

    hits = results['hits']['hits']
    return [hit['_source'] for hit in hits]

formatted_hits = format_and_display_results(documents)
for hit in formatted_hits:
    print(hit)

Computing the embedding took 35.000 seconds, BM25 search took 0.035 seconds, semantic search with ES took 0.035 seconds
{'name': 'Navragen of klacht in behandeling is bij klachten-behandelaar', 'embedding': [0.11860056966543198, 0.07757184654474258, -0.18737520277500153, 0.11011920869350433, 0.041535429656505585, -0.24801504611968994, 0.36455675959587097, 0.18536494672298431, 0.009001963771879673, -0.002263689413666725, -0.23571643233299255, -0.0371120348572731, 0.11723800748586655, 0.06960874050855637, 0.06005679443478584, 0.10550542920827866, 0.1400270164012909, -0.17514342069625854, 0.20960436761379242, 0.0855291336774826, -0.22807545959949493, -0.09166497737169266, 0.2339758574962616, -0.09809452295303345, -0.2512226104736328, 0.11861888319253922, 0.10088104009628296, -0.030590670183300972, -0.28943419456481934, -0.018061669543385506, -0.24093304574489594, 0.03904671594500542, -0.21829304099082947, -0.06020818650722504, 0.005029951222240925, 0.16413402557373047, -0.2862155437469482

[0.40215590596199036, 0.09487263113260269, 0.03560946509242058, 0.1071549579501152, -0.0877557173371315, 0.4163108170032501, 0.196039080619812, 0.12206684798002243, -0.29957184195518494, -0.07157567888498306]


In [27]:
%%time

import requests
import json

def knn_search(url: str, index: str, query_embedding: list, size: int = 10):
    headers = {'Content-Type': 'application/json'}
    data = {
        "size": 100,
        "query": {
            "script_score": {
                "query": {
                    "match_all": {}
                },
                "script": {
                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                    "params": {
                        "query_vector": query_embedding
                    }
                }
            }
        }
    }
    response = requests.get(f"{url}/{index}/_search", headers=headers, data=json.dumps(data))
    response.raise_for_status()  # Raises a HTTPError if the response status is 4xx, 5xx
    hits = response.json()['hits']['hits']
    hits = sorted(hits, key=lambda x: x['_score'], reverse=True)
    return [(hit['_source'], hit['_score']) for hit in hits[:size]]

elasticsearch_url = "http://localhost:9200"  # replace with your actual Elasticsearch URL
index = "487626dbe35f895e9e99e9477992f994"  # replace with your actual index name


query = "jeugdverenigingen"
query_embedding = get_embedding(query)['embedding']


top_hits = knn_search(elasticsearch_url, index, query_embedding)
for hit, score in top_hits:
    print(f"Score: {score}, Hit: {hit}")

Score: 1.5582311, Hit: {'file': '/app/uploads/bpmn/Sport-Subsidieaanvraag-De-Panne_v0.1.bpmn', 'embedding': [0.12243035435676575, 0.02211620658636093, -0.03235959634184837, -0.03500920906662941, -0.02211921103298664, 0.01721743494272232, 0.18440969288349152, 0.13902780413627625, -0.03098566085100174, 0.06495307385921478, -0.02350810170173645, -0.16177788376808167, 0.08088891208171844, -0.015579458326101303, -0.018983570858836174, 0.0036693578585982323, 0.060211677104234695, -0.050797682255506516, -0.024520426988601685, 0.07047168165445328, 0.11817775666713715, -0.14209261536598206, -0.06427385658025742, -0.05559815466403961, 0.09795579314231873, -0.11238501965999603, -0.10873177647590637, -0.0045449258759617805, -0.04848596453666687, -0.3007804751396179, 0.061493609100580215, -0.11960722506046295, 0.15817339718341827, 0.1104012280702591, -0.013746637850999832, 0.14826932549476624, 0.07648683339357376, 0.013208748772740364, 0.028300480917096138, 0.10428199172019958, 0.005051053129136562

In [41]:
def mu_knn_search(url: str, query_embedding: list, size: int = 10):
    headers = {'Content-Type': 'application/json'}
    data = {
        "size": 100,
        "query": {
            "script_score": {
                "query": {
                    "match_all": {}
                },
                "script": {
                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                    "params": {
                        "query_vector": query_embedding
                    }
                }
            }
        }
    }
    response = requests.post(url, headers=headers, data=json.dumps(data))
    response.raise_for_status()  # Raises a HTTPError if the response status is 4xx, 5xx

    return response.json()

url = "http://localhost:4000/bpmn-nodes/search"  # replace with your actual URL

top_hits = mu_knn_search(url, query_embedding)
for hit in top_hits["data"]:
    print(f"Name: {hit['attributes']['name']}")

Name: Jeugddienst
Name: Schepencollege
Name: Ja
Name: Ja
Name: Financiële dienst
Name: Evaluatie verantwoordingsstukken
Name: Organisator
Name: Evaluatie en goedkeuring
Name: Organisatoren op de hoogte brengen
Name: Goedgekeurd?
Name: Uiterlijk 31 augustus
Name: Nee
Name: Nee
Name: Evaluatie aanvraag
Name: Berekenen punten en rangschikking maken
Name: Jury
Name: Invullen evaluatieformulier
Name: Compleet?
Name: Uitbetalen aan organisatoren
Name: 1 maand voor de fuif
Name: Invullen aanvraagformulier
Name: superNode
Name: Aanvullen informatie evaluatieformulier
Name: Op agenda plaatsen van CBS


In [62]:
def mu_knn_search(url: str, query_text: str, query_embedding: list, size: int = 10):
    headers = {'Content-Type': 'application/json'}
    data = {
        "size": size,
        "query": {
            "bool": {
                "should": [
                    {
                        "fuzzy": {"name": {"value": query_text, "boost": 0.3}}
                    },
                    {
                        "script_score": {
                            "query": {"match_all": {}},
                            "script": {
                                "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                                "params": {"query_vector": query_embedding}
                            },
                            "boost": 0.7
                        }
                    }
                ]
            }
        }
    }
    response = requests.post(url, headers=headers, data=json.dumps(data))
    response.raise_for_status()  # Raises a HTTPError if the response status is 4xx, 5xx

    return response.json()

url = "http://localhost:4000/bpmn-nodes/search"  # replace with your actual URL

top_hits = mu_knn_search(url,"ja", query_embedding)
for hit in top_hits["data"]:
    print(f"Name: {hit['attributes']}")

Name: {'name': 'Ja', 'embedding': [0.22250376641750336, -0.10115551948547363, 0.10424230247735977, -0.020426293835043907, -0.14985327422618866, -0.06844960898160934, 0.29605355858802795, 0.15564025938510895, 0.04362944886088371, 0.09393838793039322, 0.007331326603889465, -0.30361494421958923, 0.08479025214910507, -0.006050907075405121, 0.025826221331954002, -0.021061569452285767, 0.15619920194149017, -0.09027964621782303, -0.22650651633739471, -0.03377362713217735, -0.09927066415548325, 0.05467694625258446, -0.11909399181604385, -0.07536452263593674, 0.00562808895483613, -0.1839246302843094, -0.018940404057502747, 0.0649319589138031, -0.1205848976969719, -0.2552858293056488, 0.0041329748928546906, 0.002535581588745117, 0.06624702364206314, 0.167634055018425, -0.03297240659594536, 0.21989041566848755, 0.08874306827783585, -0.1317494809627533, -0.03452153876423836, 0.09778664261102676, -0.007513310294598341, -0.05899642035365105, 0.032794613391160965, 0.23937849700450897, 0.0554619431495

In [48]:
import importlib
import library.BPMNSearch

importlib.reload(library.BPMNSearch)
from library.BPMNSearch import BPMNSearch

bpmn_search = BPMNSearch("http://localhost:4000")

query = {
    'text': 'jeugd',
    'embedding': query_embedding
}

top_hits = bpmn_search.mu_knn_search('bpmn-nodes', query, size=10)
for hit in top_hits["data"]:
    print(f"Name: {hit['attributes']['name']}, data: {hit['attributes']}")

In [4]:

!pip install flatbuffers

Collecting flatbuffers
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Using cached flatbuffers-24.3.25-py2.py3-none-any.whl (26 kB)
Installing collected packages: flatbuffers
Successfully installed flatbuffers-24.3.25


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.13.0 requires libclang>=13.0.0, which is not installed.
tensorflow-intel 2.13.0 requires tensorflow-estimator<2.14,>=2.13.0, which is not installed.
tensorflow-intel 2.13.0 requires tensorflow-io-gcs-filesystem>=0.23.1; platform_machine != "arm64" or platform_system != "Darwin", which is not installed.
tensorflow-intel 2.13.0 requires numpy<=1.24.3,>=1.22, but you have numpy 1.26.4 which is incompatible.
tensorflow-intel 2.13.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.26.1 which is incompatible.
tensorflow-intel 2.13.0 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.11.0 which is incompatible.


In [2]:
# get pending tasks from endpoint

queue_endpoint = "http://localhost:2000/tasks"
results_endpoint = "http://localhost:2000/tasks/results"
#set enivronment variables
import os

os.environ["MU_QUEUE_GRAPH"] = "http://mu.semte.ch/graphs/tasks"

os.environ["MU_SPARQL_ENDPOINT"] = "http://localhost:8890/sparql"
os.environ["MU_SPARQL_UPDATEPOINT"] = "http://localhost:8890/sparql"




# Processing the task
from library.BPMNGraph import BPMNGraph
from library.BPMNGraphEmbedder import BPMNGraphEmbedder
from library.BPMNQueue import BPMNQueue

from library.BPMNTask import BPMNTask, BPMNTaskStatus

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import requests
tasks = requests.get(queue_endpoint+"?status=failed").json()


for task in tasks:
    bpmn_task = BPMNTask.from_dict(task, BPMNQueue.get_instance(queue_endpoint))
    bpmn_task.update_status(BPMNTaskStatus.PENDING)

SPARQL endpoint used for the queue: http://localhost:8890/sparql
SPARQL updatepoint for the queue: http://localhost:8890/sparql
MU_QUEUE_GRAPH for the queue: http://mu.semte.ch/graphs/tasks


In [14]:
import importlib
import library.BPMNWorker

importlib.reload(library.BPMNWorker)

from library.BPMNWorker import BPMNWorker, BPMNWorkerManager

# Create a BPMNWorkerManager with 1 worker
worker_manager = BPMNWorkerManager(1, queue_endpoint="http://localhost:2000/tasks", graph_endpoint="http://localhost:2000/tasks/results")

Loading sentence model: paraphrase-multilingual-MiniLM-L12-v2


Loading keybert model: paraphrase-multilingual-MiniLM-L12-v2


In [18]:
worker_manager.start_workers()

No pending tasks. Sleeping for 5 seconds...
Stopping worker...


In [19]:
worker_manager.stop_workers()

In [4]:
from library.BPMNTask import BPMNTask
from library.BPMNTask import BPMNTaskStatus

# reload the BPMNTask module
import importlib
import library.BPMNTask




importlib.reload(library.BPMNTask)


BpmnTask = BPMNTask.from_dict(task)

#update the task status
BpmnTask.update_status(BPMNTaskStatus.COMPLETED)
