In [1]:
import pandas as pd

In [4]:
tx = pd.read_csv("transaction_edges_mv.csv", usecols=["sender_id","receiver_id"])
tl = pd.read_csv("trustline_edges_mv.csv", usecols=["account_id","issuer_id"])

PART_CSV = "louvain_result_res0.5.csv"

In [None]:
part_header = pd.read_csv(PART_CSV, nrows=0).columns
id_col = "account_id" if "account_id" in part_header else ("node" if "node" in part_header else None)
if id_col is None:
    raise ValueError(f"No 'account_id' and 'node' found in {PART_CSV}. Columns are: {list(part_header)}")

part = pd.read_csv(PART_CSV, usecols=[id_col]).rename(columns={id_col: "account_id"})


In [None]:
tx_ids = set(pd.concat([tx["sender_id"], tx["receiver_id"]], ignore_index=True).unique())
tl_ids = set(pd.concat([tl["account_id"], tl["issuer_id"]], ignore_index=True).unique())
louv_ids = set(part["account_id"].unique())

In [None]:
print("tx_accounts:", len(tx_ids))
print("tl_accounts:", len(tl_ids))
print("both (tx ∩ tl):", len(tx_ids & tl_ids))
print("trustline_only (tl - tx):", len(tl_ids - tx_ids))
print("tx_only (tx - tl):", len(tx_ids - tl_ids))

print("louvain_nodes:", len(louv_ids))
print("louvain ∩ tx:", len(louv_ids & tx_ids))
print("louvain ∩ tl:", len(louv_ids & tl_ids))

tx_accounts: 210438
tl_accounts: 2211156
both (tx ∩ tl): 165693
trustline_only (tl - tx): 2045463
tx_only (tx - tl): 44745
louvain_nodes: 2230048
louvain ∩ tx: 210091
louvain ∩ tl: 2185581


In [None]:
louv_trustline_only = (louv_ids & tl_ids) - tx_ids
print("louvain trustline-only:", len(louv_trustline_only))

louvain trustline-only: 2019957


----------

In [1]:
import pandas as pd
tx = pd.read_csv("transaction_edges_mv.csv", usecols=["sender_id","receiver_id","amount","tx_date"])
tl = pd.read_csv("trustline_edges_mv.csv", usecols=["account_id","issuer_id","asset_code","balance"])
tx["tx_date"] = pd.to_datetime(tx["tx_date"], errors="coerce")

edge_fp = {
    "tx_rows": len(tx),
    "tx_pairs": tx[["sender_id","receiver_id"]].drop_duplicates().shape[0],
    "tx_amount_sum": float(tx["amount"].sum()),
    "tx_date_min": str(tx["tx_date"].min()),
    "tx_date_max": str(tx["tx_date"].max()),
    "tl_rows": len(tl),
    "tl_keys": tl[["account_id","issuer_id","asset_code"]].drop_duplicates().shape[0],
    "tl_balance_sum": float(tl["balance"].sum())
}
edge_fp


{'tx_rows': 808072,
 'tx_pairs': 268353,
 'tx_amount_sum': 29022895466579.82,
 'tx_date_min': '2025-02-04 00:00:00',
 'tx_date_max': '2025-02-15 00:00:00',
 'tl_rows': 25778427,
 'tl_keys': 25778427,
 'tl_balance_sum': 1.9283672008367891e+24}

In [None]:
import pickle
with open("pkl/account-transaction-trustline-full.pkl","rb") as f:
    G = pickle.load(f)

tx_nodes = set(tx["sender_id"]).union(tx["receiver_id"])
tl_nodes = set(tl["account_id"]).union(tl["issuer_id"])
edge_nodes_union = tx_nodes | tl_nodes
g_nodes = set(G.nodes)

print({
    "G_nodes": G.number_of_nodes(),
    "G_edges": G.number_of_edges(),
    "missing_in_G": len(edge_nodes_union - g_nodes),
    "extra_in_G":   len(g_nodes - edge_nodes_union)
})


{'G_nodes': 2255901, 'G_edges': 14760623, 'missing_in_G': 0, 'extra_in_G': 0}


In [4]:
part = pd.read_csv("louvain_result_res0.5.csv")
if "account_id" not in part.columns and "node" in part.columns:
    part = part.rename(columns={"node":"account_id"})
part_nodes = set(part["account_id"])
covered = sum(1 for n in part_nodes if n in g_nodes)
print(f"Partition nodes: {len(part_nodes):,}, present in G: {covered:,}")


Partition nodes: 2,230,048, present in G: 2,230,048
