In [None]:
pip install "pycozo[embedded,pandas]"

In [None]:
from pycozo.client import Client
import pandas as pd
from pycozo.client import Client
client = Client('rocksdb', 'datafile.db')

In [None]:
# Create reified nodes "taggedComment"

import pandas as pd

# --- 1) Charger les données depuis CozoDB via client.run() ---
edge_label_df = pd.DataFrame(client.run('?[id_e, ln] := *edge_label[id_e, ln]'))
edge_df       = pd.DataFrame(client.run('?[id_e, ns, nd] := *edge[id_e, ns, nd]'))
node_label_df = pd.DataFrame(client.run('?[id_n, ln] := *node_label[id_n, ln]'))

# --- 2) Filtrer les arêtes hasTag sur commentaires → tags ---
# On ne garde que ln="hasTag"
hasTag = edge_label_df[edge_label_df['ln'] == 'hasTag']
# Joindre pour récupérer ns et nd
df = hasTag.merge(edge_df, on='id_e')
# Filtrer source de type comment
df = df.merge(node_label_df[node_label_df['ln'] == 'comment'][['id_n']], left_on='ns', right_on='id_n')
# Filtrer destination de type tag
df = df.merge(node_label_df[node_label_df['ln'] == 'tag'][['id_n']], left_on='nd', right_on='id_n')

# --- 3) Générer les reified nodes "taggedComment" ---
next_rn = 3000001
reified_ids = []
n_comp_edge = []  # [id_rn, id_e]
n_comp_node = []  # [id_rn, id_n]

for _, row in df.iterrows():
    rid = next_rn
    next_rn += 1
    reified_ids.append(rid)
    # on rattache l'arête hasTag
    n_comp_edge.append([rid, row['id_e']])
    # on rattache les deux nœuds : comment (ns) et tag (nd)
    n_comp_node.append([rid, row['ns']])
    n_comp_node.append([rid, row['nd']])

# --- 4) Helper pour formater les commandes Cozo ---
def build_cozo_insert(rel, rows, cols):
    # ex: rel="n_composed_of_edge", cols=["id_rn","id_e"]
    entries = ",".join(
        "[" + ",".join(f"\"{x}\"" if isinstance(x,str) else str(x) for x in row) + "]"
        for row in rows
    )
    return f"?[{','.join(cols)}] <- [{entries}] :put {rel} {{{','.join(cols)}}}"

# --- 5) Afficher les scripts Cozo ---
# a) créer les reified_node
print(build_cozo_insert("reified_node", [[rid] for rid in reified_ids], ["id_rn"]))
# b) rattacher les arêtes
print(build_cozo_insert("n_composed_of_edge", n_comp_edge, ["id_rn","id_e"]))
# c) rattacher les nœuds
print(build_cozo_insert("n_composed_of_node", n_comp_node, ["id_rn","id_n"]))


In [None]:
def generate_taggedcomment_labels(start_id=3000001, end_id=3740962, batch_size=10000):
    
    scripts = []
    for batch_start in range(start_id, end_id + 1, batch_size):
        batch_end = min(batch_start + batch_size - 1, end_id)
        assignments = [f"[{i},\"taggedComment\"]" for i in range(batch_start, batch_end + 1)]
        joined = ",".join(assignments)
        scripts.append(f"?[id_n,ln] <- [{joined}] :put node_label {{id_n,ln}}")
    return scripts

# Exemple d'utilisation
for cmd in generate_taggedcomment_labels():
    res = client.run(cmd)
    res

In [None]:
res = client.run(build_cozo_insert("reified_node", [[rid] for rid in reified_ids], ["id_rn"]))
res

In [None]:
res = client.run(build_cozo_insert("n_composed_of_edge", n_comp_edge, ["id_rn","id_e"]))
res

In [None]:
res = client.run(build_cozo_insert("n_composed_of_node", n_comp_node, ["id_rn","id_n"]))
res