In [2]:

from pycozo.client import Client
client = Client('rocksdb', 'snbsf1.db')

In [2]:
# Create reified nodes "taggedComment"


import pandas as pd

# 1) Charger les données depuis CozoDB via client.run() ---
edge_label_df = pd.DataFrame(client.run('?[id_e, ln] := *edge_label[id_e, ln]'))
edge_df       = pd.DataFrame(client.run('?[id_e, ns, nd] := *edge[id_e, ns, nd]'))
node_label_df = pd.DataFrame(client.run('?[id_n, ln] := *node_label[id_n, ln]'))

# 2) Filtrer les arêtes hasTag DE commentaires VERS tags 
# On ne garde que ln="hasTag"
hasTag = edge_label_df[edge_label_df['ln'] == 'hasTag']
# Joindre pour récupérer ns et nd
df = hasTag.merge(edge_df, on='id_e')
# Filtrer source de type comment
df = df.merge(node_label_df[node_label_df['ln'] == 'comment'][['id_n']], left_on='ns', right_on='id_n')
# Filtrer destination de type tag
df = df.merge(node_label_df[node_label_df['ln'] == 'tag'][['id_n']], left_on='nd', right_on='id_n')

# 3) Générer les reified nodes "taggedComment" ---
#since reified nodes are also standrd nodes, so their ids start from the max_id of standard nodes+1
max_id = client.run("?[n] := *node[n], :order -n :limit 1").iloc[0,0]
next_rn= max_id + 1

reified_ids = []
n_comp_edge = []  # [id_rn, id_e]
n_comp_node = []  # [id_rn, id_n]

for _, row in df.iterrows():
    rid = next_rn
    next_rn += 1
    reified_ids.append(rid)
    # on rattache l'arête hasTag
    n_comp_edge.append([rid, row['id_e']])
    # on rattache les deux nœuds : comment (ns) et tag (nd)
    n_comp_node.append([rid, row['ns']])
    n_comp_node.append([rid, row['nd']])

#4) Helper pour formater les commandes Cozo ---
def build_cozo_insert(rel, rows, cols):
    # ex: rel="n_composed_of_edge", cols=["id_rn","id_e"]
    entries = ",".join(
        "[" + ",".join(f"\"{x}\"" if isinstance(x,str) else str(x) for x in row) + "]"
        for row in rows
    )
    return f"?[{','.join(cols)}] <- [{entries}] :put {rel} {{{','.join(cols)}}}"

# 5) Afficher les scripts Cozo permettant de :
# # a) créer les reified_node
# print(build_cozo_insert("reified_node", [[rid] for rid in reified_ids], ["id_rn"]))
# # b) rattacher les arêtes
# print(build_cozo_insert("n_composed_of_edge", n_comp_edge, ["id_rn","id_e"]))
# # c) rattacher les nœuds
# print(build_cozo_insert("n_composed_of_node", n_comp_node, ["id_rn","id_n"]))


In [5]:

def generate_taggedcomment_labels(start_id, end_id, batch_size=10000):
    
    scripts = []
    for batch_start in range(start_id, end_id + 1, batch_size):
        batch_end = min(batch_start + batch_size - 1, end_id)
        assignments = [f"[{i},\"taggedComment\"]" for i in range(batch_start, batch_end + 1)]
        joined = ",".join(assignments)
        scripts.append(f"?[id_n,ln] <- [{joined}] :put node_label {{id_n,ln}}")
    return scripts
# print(client.run("?[n] := *node[n]"))
max_id = client.run("?[n] := *node[n], :order -n :limit 1").iloc[0,0]
start_id= max_id + 1
end_id= start_id +len(reified_ids)
print(max_id)
print(start_id)
print(len(reified_ids))
print(end_id)
# RUN
for cmd in generate_taggedcomment_labels(start_id, end_id, batch_size=10000):
    res = client.run(cmd)
    res

35184372099695
35184372099696
2707446
35184374807142


In [6]:
res = client.run(build_cozo_insert("reified_node", [[rid] for rid in reified_ids], ["id_rn"]))
res

Unnamed: 0,status
0,OK


In [8]:
res = client.run(build_cozo_insert("n_composed_of_edge", n_comp_edge, ["id_rn","id_e"]))
res

Unnamed: 0,status
0,OK


In [9]:
res = client.run(build_cozo_insert("n_composed_of_node", n_comp_node, ["id_rn","id_n"]))
res

Unnamed: 0,status
0,OK


In [3]:
res = client.run('?[n] := *reified_node[n]')
print(res)

                      n
0        35184372099696
1        35184372099697
2        35184372099698
3        35184372099699
4        35184372099700
...                 ...
2707441  35184374807137
2707442  35184374807138
2707443  35184374807139
2707444  35184374807140
2707445  35184374807141

[2707446 rows x 1 columns]


In [4]:
#Q1 Retrieve nodes with their labels and properties belonging to a given reified node.
#cozo 
q1= """ 
?[associated_node, associated_label, associated_prop] :=
id_rn = 35184372090193,
*n_composed_of_node[id_rn, associated_node], *node_label[associated_node, associated_label],*n_composed_of_node_prop[id_rn,associated_node, associated_prop]
"""
res = client.run(q1)

In [6]:
# Q2 Search for an element in a reified node by criteria: Retrieve IDs of Tag node in tagged comments with tag name "İzmir_University".
#cozo
q2=""" ?[tagNode] :=
  *reified_node[r_id],
  *n_composed_of_node[r_id, tagNode],
  *node_label[tagNode, "tag"],
  *node_prop[tagNode, "name", "İzmir_University"]"""
res = client.run(q2)
res

Unnamed: 0,tagNode
0,6568


In [7]:
import time
# exécuter 10 fois auto 
n = 10
times = []

for _ in range(n):
    start = time.time()  
    client.run(q2)
    end = time.time()  
    times.append((end - start) * 1000)  # en ms

mean = sum(times) / n
print(f"Temps moyen sur {n} exécutions : {mean:.3f} ms")

Temps moyen sur 10 exécutions : 47526.868 ms
