# Exploring the Graph of Dependencies

A small exploration of the graph of dependencies to understand the data and the structure of the graph.

## Data

In [2]:
import json
import urllib.request

url = "https://raw.githubusercontent.com/deepfunding/dependency-graph/refs/heads/main/graph/unweighted_graph.json"
with urllib.request.urlopen(url) as response:
    graph_data = json.loads(response.read())


In [10]:
import polars as pl

pl.Config.set_tbl_rows(20)

df = pl.read_csv('https://raw.githubusercontent.com/deepfunding/dependency-graph/refs/heads/main/graph/unweighted_graph.csv')
df.sample(10)

seed_repo_owner,seed_repo_name,package_name,package_repo_owner,package_repo_name,package_source
str,str,str,str,str,str
"""grandinetech""","""grandine""","""fallible-iterator""","""sfackler""","""rust-fallible-iterator""","""RUST"""
"""safe-global""","""safe-smart-account""","""resolve-from""","""sindresorhus""","""resolve-from""","""NPM"""
"""safe-global""","""safe-smart-account""","""@ethersproject/rlp""","""ethers-io""","""ethers.js""","""NPM"""
"""ethereum""","""remix-project""","""postcss-minify-params""","""cssnano""","""cssnano""","""NPM"""
"""paradigmxyz""","""reth""","""alloy-sol-macro-expander""","""alloy-rs""","""core""","""RUST"""
"""ethereum""","""remix-project""","""chownr""","""isaacs""","""chownr""","""NPM"""
"""chainsafe""","""lodestar""","""jsonpointer""","""janl""","""node-jsonpointer""","""NPM"""
"""sigp""","""lighthouse""","""rustc_version""","""djc""","""rustc-version-rs""","""RUST"""
"""consensys""","""teku""","""@babel/highlight""","""babel""","""babel""","""NPM"""
"""ethereum""","""remix-project""","""@lerna/conventional-commits""","""lerna""","""lerna""","""NPM"""


The CSV data is the definition of a graph. The source is `seed_repo_owner/seed_repo_name` and the target is `package_repo_owner/package_repo_name`. There is also a source for the package, which is `package_source`.

In [11]:
df.filter(
    (pl.col("seed_repo_owner") == "ethereum") &
    (pl.col("seed_repo_name") == "solidity")
)

seed_repo_owner,seed_repo_name,package_name,package_repo_owner,package_repo_name,package_source
str,str,str,str,str,str
"""ethereum""","""solidity""","""sphinx-a4doc""","""taminomara""","""sphinx-a4doc""","""PIP"""
"""ethereum""","""solidity""","""sphinx""","""sphinx-doc""","""sphinx""","""PIP"""
"""ethereum""","""solidity""","""sphinx-rtd-theme""","""readthedocs""","""sphinx_rtd_theme""","""PIP"""


In [12]:
df.group_by("package_source").agg(pl.len())

package_source,len
str,u32
"""GO""",631
"""NPM""",10746
"""PIP""",135
"""RUST""",2024


In [13]:
df = df.with_columns(
    pl.concat_str(
        [
            pl.col("seed_repo_owner"),
            pl.col("seed_repo_name"),
        ],
        separator="/"
    ).alias("repo"),
    pl.concat_str(
        [
            pl.col("package_repo_owner"),
            pl.col("package_repo_name"),
        ],
        separator="/"
    ).alias("package_repo"),
)

In [18]:
df.group_by("repo").agg(pl.col("package_repo").n_unique()).sort("package_repo", descending=True)

repo,package_repo
str,u32
"""ethereum/remix-project""",2277
"""web3/web3.js""",1709
"""chainsafe/lodestar""",1514
"""eth-infinitism/account-abstrac…",854
"""ethereumjs/ethereumjs-monorepo""",796
"""safe-global/safe-smart-account""",519
"""paradigmxyz/reth""",463
"""sigp/lighthouse""",451
"""grandinetech/grandine""",435
"""erigontech/erigon""",253


In [17]:
df.group_by("package_repo").agg(pl.col("repo").n_unique()).sort("repo", descending=True)

package_repo,repo
str,u32
"""sindresorhus/locate-path""",7
"""colorjs/color-name""",7
"""sindresorhus/p-locate""",7
"""sindresorhus/shebang-regex""",7
"""eslint/js""",7
"""jonschlinkert/is-extglob""",7
"""ryanzim/universalify""",7
"""ricmoo/aes-js""",7
"""sindresorhus/parent-module""",7
"""epoberezkin/fast-json-stable-s…",7
