In [36]:
import polars as pl
import cupy as cp
import os
import networkx as nx
from pyvis.network import Network

In [37]:
lf_test = pl.scan_csv(os.path.join("dataset_Sep_9_2025", "test.csv"))
lf_train = pl.scan_csv(os.path.join("dataset_Sep_9_2025", "train.csv"))
lf_sample = pl.scan_csv(os.path.join("dataset_Sep_9_2025", "sample_submission.csv"))

In [38]:
df_test = lf_test.collect()
df_train = lf_train.collect()
df_sample = lf_sample.collect()

In [39]:
df_test.shape

(45, 2)

In [40]:
df_train.shape

(180, 8)

In [41]:
df_sample.shape

(45, 3)

In [42]:
df_train.schema

Schema([('timestamp', String),
        ('juror', String),
        ('repo_a', String),
        ('repo_b', String),
        ('parent', String),
        ('choice', Int64),
        ('multiplier', Float64),
        ('reasoning', String)])

In [43]:
df_test.schema

Schema([('repo', String), ('parent', String)])

In [44]:
df_sample.schema

Schema([('repo', String), ('parent', String), ('weight', Float64)])

In [45]:
df_train.head()

timestamp,juror,repo_a,repo_b,parent,choice,multiplier,reasoning
str,str,str,str,str,i64,f64,str
"""2025-01-24T19:03:52.628Z""","""L1Juror1""","""https://github.com/erigontech/…","""https://github.com/hyperledger…","""ethereum""",2,5.27,"""Erigon and Besu are both execu…"
"""2025-01-24T19:17:57.066Z""","""L1Juror1""","""https://github.com/web3/web3.j…","""https://github.com/paradigmxyz…","""ethereum""",1,2.22,"""Reth has a 2% market share in …"
"""2025-01-24T19:20:19.601Z""","""L1Juror1""","""https://github.com/erigontech/…","""https://github.com/ethereum/go…","""ethereum""",2,10.55,"""Erigon and Geth are both execu…"
"""2025-01-24T19:25:40.017Z""","""L1Juror1""","""https://github.com/ethereumjs/…","""https://github.com/ethers-io/e…","""ethereum""",2,4.0,"""EthereumJS is not used directl…"
"""2025-01-15T11:19:51.121Z""","""L1Juror10""","""https://github.com/erigontech/…","""https://github.com/ethereum/we…","""ethereum""",1,50.0,"""A performant and robust execut…"


In [46]:
df_sample.head()

repo,parent,weight
str,str,f64
"""https://github.com/a16z/helios""","""ethereum""",0.013638
"""https://github.com/alloy-rs/al…","""ethereum""",0.033943
"""https://github.com/apeworx/ape""","""ethereum""",0.008906
"""https://github.com/chainsafe/l…","""ethereum""",0.007414
"""https://github.com/consensys/t…","""ethereum""",0.007


In [47]:
df_test.head()

repo,parent
str,str
"""https://github.com/a16z/helios""","""ethereum"""
"""https://github.com/alloy-rs/al…","""ethereum"""
"""https://github.com/apeworx/ape""","""ethereum"""
"""https://github.com/chainsafe/l…","""ethereum"""
"""https://github.com/consensys/t…","""ethereum"""


In [48]:
# --- Cell 3: Build Dependency Graph ---
G = nx.DiGraph()

for row in df_sample.iter_rows(named=True):
    G.add_edge(row["parent"], row["repo"], weight=row["weight"])

print(f"✅ Dependency graph built with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")


✅ Dependency graph built with 46 nodes and 45 edges.


In [49]:
# --- Cell 4: Centrality Analysis (Critical Repos) ---
centrality = nx.degree_centrality(G)
sorted_centrality = sorted(centrality.items(), key=lambda x: -x[1])[:5]

print("\n🔎 Top 5 critical repos by centrality:")
for repo, score in sorted_centrality:
    print(f"{repo}: {score:.3f}")



🔎 Top 5 critical repos by centrality:
ethereum: 1.000
https://github.com/a16z/helios: 0.022
https://github.com/alloy-rs/alloy: 0.022
https://github.com/apeworx/ape: 0.022
https://github.com/chainsafe/lodestar: 0.022


In [50]:
# --- Cell 5: Simple Funding Attribution Simulation ---
funding_pool = 1000  # example: 1000 tokens
weights = nx.get_edge_attributes(G, "weight")
total_weight = sum(weights.values())

attribution = {}
for node in G.nodes():
    incoming = sum([d["weight"] for _, _, d in G.in_edges(node, data=True)])
    attribution[node] = (incoming / total_weight) * funding_pool if total_weight > 0 else 0

print("\n💰 Funding Attribution (simulated):")
for repo, score in attribution.items():
    print(f"{repo}: {score:.2f} tokens")



💰 Funding Attribution (simulated):
ethereum: 0.00 tokens
https://github.com/a16z/helios: 14.31 tokens
https://github.com/alloy-rs/alloy: 35.61 tokens
https://github.com/apeworx/ape: 9.34 tokens
https://github.com/chainsafe/lodestar: 7.78 tokens
https://github.com/consensys/teku: 7.34 tokens
https://github.com/erigontech/erigon: 40.77 tokens
https://github.com/eth-infinitism/account-abstraction: 50.41 tokens
https://github.com/ethereum-lists/chains: 24.59 tokens
https://github.com/ethereum/consensus-specs: 10.09 tokens
https://github.com/ethereum/eips: 14.62 tokens
https://github.com/ethereum/evmone: 3.26 tokens
https://github.com/ethereum/execution-apis: 48.86 tokens
https://github.com/argotorg/fe: 15.21 tokens
https://github.com/ethereum/go-ethereum: 43.73 tokens
https://github.com/ethereum/py-evm: 6.25 tokens
https://github.com/ethereum/remix-project: 47.22 tokens
https://github.com/argotorg/solidity: 30.26 tokens
https://github.com/argotorg/sourcify: 45.82 tokens
https://github.com

In [55]:
net = Network(
    notebook=True, 
    directed=True, 
    cdn_resources="in_line"   # ensures inline interactive graph works
)

# Add nodes + edges
for row in df_sample.iter_rows(named=True):
    net.add_node(row["repo"], label=row["repo"])
    net.add_node(row["parent"], label=row["parent"])
    net.add_edge(row["parent"], row["repo"], value=row["weight"])

# Show inline AND save HTML
net.show("dependency_graph.html", notebook=True, local=True)

dependency_graph.html
