In [2]:
from dotenv import load_dotenv
import os
import pandas as pd
from pyoso import Client

load_dotenv()
OSO_API_KEY = os.environ['OSO_API_KEY']
client = Client(api_key=OSO_API_KEY)

### Find packages available in the Stylus SDK

Query the package_owners_v0 model to list out the packages in the Stylus SDK

In [3]:
query = """
SELECT
  package_artifact_source,
  package_artifact_name,
  package_owner_project_id,
  package_owner_artifact_namespace,
  package_owner_artifact_name
FROM package_owners_v0
WHERE package_owner_artifact_namespace = 'offchainlabs'
and package_owner_artifact_name = 'stylus-sdk-rs'
"""
df = client.to_pandas(query)
df

Unnamed: 0,package_artifact_source,package_artifact_name,package_owner_project_id,package_owner_artifact_namespace,package_owner_artifact_name
0,RUST,stylus-proc,6O68Uuu8PWcUMTWd34oZG3ldUEoQCxpNqdY3swqo+eo=,offchainlabs,stylus-sdk-rs
1,RUST,stylus-sdk,6O68Uuu8PWcUMTWd34oZG3ldUEoQCxpNqdY3swqo+eo=,offchainlabs,stylus-sdk-rs
2,RUST,mini-alloc,6O68Uuu8PWcUMTWd34oZG3ldUEoQCxpNqdY3swqo+eo=,offchainlabs,stylus-sdk-rs


### Usage statistics of Stylus SDK components

Identify the adoption of Stylus SDK components in aggregate and detail using sbom data

In [4]:
query = """
SELECT
  package_owners.package_owner_artifact_namespace as maintainter,
  sboms.to_package_artifact_source as package_source,
  sboms.to_package_artifact_name as package_name,
  count(distinct sboms.from_artifact_name) as count_dependent_repos,
  count(distinct sboms.from_artifact_namespace) as count_dependent_projects
FROM sboms_v0 sboms
JOIN package_owners_v0 package_owners
ON sboms.to_package_artifact_name = package_owners.package_artifact_name
AND sboms.to_package_artifact_source = package_owners.package_artifact_source
WHERE package_owners.package_owner_artifact_namespace = 'offchainlabs'
and package_owners.package_owner_artifact_name = 'stylus-sdk-rs'
GROUP BY 1,2,3
"""
df = client.to_pandas(query)
df

Unnamed: 0,maintainter,package_source,package_name,count_dependent_repos,count_dependent_projects
0,offchainlabs,RUST,stylus-proc,27,10
1,offchainlabs,RUST,stylus-sdk,30,12
2,offchainlabs,RUST,mini-alloc,27,12


In [5]:
query = """
SELECT
  package_owners.package_owner_artifact_namespace as maintainter,
  sboms.to_package_artifact_source as package_source,
  sboms.to_package_artifact_name as package_name,
  sboms.from_artifact_id as from_artifact_id,
  sboms.from_artifact_name,
  sboms.from_artifact_namespace
FROM sboms_v0 sboms
JOIN package_owners_v0 package_owners
ON sboms.to_package_artifact_name = package_owners.package_artifact_name
AND sboms.to_package_artifact_source = package_owners.package_artifact_source
WHERE package_owners.package_owner_artifact_namespace = 'offchainlabs'
and package_owners.package_owner_artifact_name = 'stylus-sdk-rs'
"""
df = client.to_pandas(query)
df

# cut down the dataframe to only include the unique from_artifact_namespace and from_artifact_name values
dependent_projects = df[['from_artifact_id', 'from_artifact_namespace', 'from_artifact_name']].drop_duplicates()


### Relationship between different packages in the ecosystem

For all projects using the Stylus SDK, what other dependencies do they have, and who maintains those dependencies?

In [11]:
query = """
WITH seed_repos AS (
  -- First get all repos that depend on stylus-sdk-rs packages
  SELECT DISTINCT 
    sboms.from_artifact_namespace,
    sboms.from_artifact_name
  FROM sboms_v0 sboms
  JOIN package_owners_v0 package_owners
    ON sboms.to_package_artifact_name = package_owners.package_artifact_name
    AND sboms.to_package_artifact_source = package_owners.package_artifact_source
  WHERE package_owners.package_owner_artifact_namespace = 'offchainlabs'
    AND package_owners.package_owner_artifact_name = 'stylus-sdk-rs'
)
SELECT DISTINCT
  sboms.from_artifact_namespace as seed_repo_owner,
  sboms.from_artifact_name as seed_repo_name,
  sboms.to_package_artifact_name as package_name,
  package_owners.package_owner_artifact_namespace as package_repo_owner,
  package_owners.package_owner_artifact_name as package_repo_name,
  sboms.to_package_artifact_source as package_source
FROM sboms_v0 sboms
JOIN package_owners_v0 package_owners
  ON sboms.to_package_artifact_name = package_owners.package_artifact_name
  AND sboms.to_package_artifact_source = package_owners.package_artifact_source
JOIN seed_repos
  ON sboms.from_artifact_namespace = seed_repos.from_artifact_namespace
  AND sboms.from_artifact_name = seed_repos.from_artifact_name
WHERE package_owners.package_owner_artifact_namespace IS NOT NULL
"""
df = client.to_pandas(query)

df



Unnamed: 0,seed_repo_owner,seed_repo_name,package_name,package_repo_owner,package_repo_name,package_source
0,offchainlabs,stylus-quickstart-vending-machine,strum_macros,peternator7,strum,RUST
1,offchainlabs,stylus-tutorials,is-set,inspect-js,is-set,NPM
2,fluidity-money,long.so,is-set,inspect-js,is-set,NPM
3,blockscout,cargo-stylus-test-examples,sisteransi,terkelg,sisteransi,NPM
4,fluidity-money,long.so,rxjs,reactivex,rxjs,NPM
...,...,...,...,...,...,...
14525,alt-research,nitro-near,github.com/syndtr/goleveldb,syndtr,goleveldb,GO
14526,fluidity-money,long.so,@metamask/sdk-communication-layer,metamask,metamask-sdk,NPM
14527,fluidity-money,long.so,stop-iteration-iterator,ljharb,stop-iteration-iterator,NPM
14528,blockscout,cargo-stylus-test-examples,string-width,sindresorhus,string-width,NPM


In [12]:
def create_sankey_data(df):
    # Calculate total outgoing flow for each owner
    owner_flow = df.groupby('package_repo_owner').size()
    top_owners = owner_flow.nlargest(100).index.tolist()
    
    # Filter dataframe to only include top owners
    df = df[df['package_repo_owner'].isin(top_owners)]
    
    # Create nodes and links
    nodes = []
    node_indices = {}
    links = {'source': [], 'target': [], 'value': []}
    
    # Process first level (package_source)
    sources = df['package_source'].unique()
    for source in sources:
        node_indices[f"source_{source}"] = len(nodes)
        nodes.append(source)
    
    # Process second level (package_repo_owner)
    owners = df['package_repo_owner'].unique()
    for owner in owners:
        node_indices[f"owner_{owner}"] = len(nodes)
        nodes.append(owner)
    
    # Process third level (seed_repo_owner)
    seed_owners = df['seed_repo_owner'].unique()
    for seed_owner in seed_owners:
        node_indices[f"seed_{seed_owner}"] = len(nodes)
        nodes.append(seed_owner)
    
    # Create source -> owner links
    source_to_owner = df.groupby(['package_source', 'package_repo_owner']).size().reset_index(name='count')
    for _, row in source_to_owner.iterrows():
        source_idx = node_indices[f"source_{row['package_source']}"]
        target_idx = node_indices[f"owner_{row['package_repo_owner']}"]
        links['source'].append(source_idx)
        links['target'].append(target_idx)
        links['value'].append(row['count'])
    
    # Create owner -> seed_owner links
    owner_to_seed = df.groupby(['package_repo_owner', 'seed_repo_owner']).size().reset_index(name='count')
    for _, row in owner_to_seed.iterrows():
        source_idx = node_indices[f"owner_{row['package_repo_owner']}"]
        target_idx = node_indices[f"seed_{row['seed_repo_owner']}"]
        links['source'].append(source_idx)
        links['target'].append(target_idx)
        links['value'].append(row['count'])
    
    return nodes, links

# Create Sankey data
nodes, links = create_sankey_data(df)

# Create and display the Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad = 15,
        thickness = 20,
        line = dict(color = "black", width = 0.5),
        label = nodes,
        color = "blue"
    ),
    link = dict(
        source = links['source'],
        target = links['target'],
        value = links['value']
    )
)])

# Update layout
fig.update_layout(
    title_text="Package Dependencies Flow: Source → Owner → Seed Repo Owner (Top 100 Owners)",
    font_size=10,
    height=800
)

fig.show()

### Get developer metrics for dependent projects  

In [13]:

# Remove the rows where from_artifact_namespace is offchainlabs
dependent_projects = dependent_projects[dependent_projects['from_artifact_namespace'] != 'offchainlabs']

# Filter out NA values and then create the SQL IN clause string
from_artifact_ids = "'" + "','".join(dependent_projects['from_artifact_id'].dropna().unique()) + "'"

from_artifact_ids

dependent_projects


Unnamed: 0,from_artifact_id,from_artifact_namespace,from_artifact_name
3,NNb+La72aA3UGNFKBDjiVcAR/TZpSouiZZcURtFhat8=,teeeeeo9,basket-swap-stylus
6,Tep/DN9AnX9pHOCMLj6eJ1A1Kl1lmvg7oIPAiVf0O/A=,blockscout,cargo-stylus-test-examples
10,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,fluidity-money,long.so
11,yQgaIZ4uOdQF1zKvUNvupSXwDXUOmlCgslj0/ZNPkdU=,seabrick,seabrick-contracts
15,GdPj37eneQbU8nag6yWYCK2K20uGmYLY8FI0j66amQY=,theblokc,arbdevbootcamp-reference
16,1o8fuaoqid4kC4Pk5Gde91b/UBwKmMJ14YunC/TLEJ4=,solide-project,stylide
25,1GYE6W/tDKF5l+U0CPT5GmdPPOvpDAn+aYxNxY+YMno=,cryptosmartnow,stylus_bitsave
26,lcFn7UyVJ4JqV5mG5O/D6Z6NH7P4eAf/dRzk3t5rDTg=,lit-protocol,lit-precompiles
28,1EyNbWC6miQ2pbvkfba8WWGeTE6wDCZR415r+j4NqTY=,cryptosmartnow,bs_arbitrum_main
29,xFKbS5I26pTHur9TL+pJr1UAFalKaMO3R3DjHIEuK/g=,dsrvlabs,arbiturm-stylus-solidity-compare


In [9]:
# filter query to only include the dependent_projects_list values for artifact_id in timeseries_metrics_by_artifact_v0    

query = f"""
SELECT
  ts.artifact_id,
  m.metric_name,
  ts.sample_date,
  ts.amount
FROM timeseries_metrics_by_artifact_v0 ts 
JOIN metrics_v0 m ON ts.metric_id = m.metric_id
WHERE artifact_id IN ('KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=')
AND ts.sample_date >= DATE '2024-09-01'
AND m.metric_name = 'GITHUB_active_developers_monthly'
order by ts.artifact_id, sample_date
"""
df = client.to_pandas(query)
df



Unnamed: 0,artifact_id,metric_name,sample_date,amount
0,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,GITHUB_active_developers_monthly,2024-09-01,3
1,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,GITHUB_active_developers_monthly,2024-09-01,3
2,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,GITHUB_active_developers_monthly,2024-10-01,3
3,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,GITHUB_active_developers_monthly,2024-10-01,3
4,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,GITHUB_active_developers_monthly,2024-11-01,3
5,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,GITHUB_active_developers_monthly,2024-11-01,3
6,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,GITHUB_active_developers_monthly,2024-12-01,3
7,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,GITHUB_active_developers_monthly,2024-12-01,3
8,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,GITHUB_active_developers_monthly,2025-01-01,2
9,KPl17qcDKNhAkw9myPkpPZD3kjx6DJKIJ31cHjqpSPM=,GITHUB_active_developers_monthly,2025-01-01,2
