In [1]:
# ! pip install pyoso

In [2]:
from dotenv import load_dotenv
import os
import pandas as pd
from pyoso import Client

load_dotenv()
OSO_API_KEY = os.environ['OSO_API_KEY']
client = Client(api_key=OSO_API_KEY)

In [3]:
# Helper function for inserting arrays into queries
def stringify(arr):
    return "'" + "','".join(arr) + "'"

## Identifying relevant projects

In [4]:
# Approach 1: use OSO's collection_names
client.to_pandas("""
    SELECT
        p.project_id,
        p.project_source,
        p.project_name AS name_of_oso_project_file,
        p.display_name
    FROM projects_v1 AS p
    JOIN projects_by_collection_v1 AS pbc ON pbc.project_id = p.project_id
    WHERE pbc.collection_name = 'optimism'
""")

Unnamed: 0,project_id,project_source,name_of_oso_project_file,display_name
0,qH3xmb7F+NVyPA7WyNZBz38LjRiqgTyOm8BErjMO/Kw=,OSS_DIRECTORY,0xparc,0xPARC
1,d3peEk5lBmuaMIpngrwaHzvw5AjlX80QqJk9TlrypaQ=,OSS_DIRECTORY,0xdeployer-ham,Ham
2,KHoI9CsfmTDAaqorYNCL2PnzxZSORHSuDAPZMma+sic=,OSS_DIRECTORY,0xbcamp,Optimism Solidity Survivor Bootcamp
3,ZPW/NLGzTCCfCUpb6LQD+Nei314hKSgnghJ5z0T1t7U=,OSS_DIRECTORY,xkcp,Keccak hashing
4,36HjWnU2xdrCKxiyFGLV37hFZTUbq13dMxq8Pj0nNgY=,OSS_DIRECTORY,op-stack-deployer-aymen-tirchi,OP Stack Deployer
...,...,...,...,...
718,mSqhiRtouVTDJVd4W8gxDlEZLXPtyqSC5qComGVn17c=,OSS_DIRECTORY,tockable,Tockable.xyz
719,SthV5g2Q1yIGtXOXTbasXOnGsP+YGZr3303aM3rZ0/k=,OSS_DIRECTORY,zkbob,zkBob
720,4ahxM2T2GW1Xl2gGh+jqUE7ICT1P4h8XqbW5RbyDGck=,OSS_DIRECTORY,op-ai-web-asharibali,Optimism AI - ChatBot
721,9IE7nBPNl0g93qBfjjCAq2KUNwJeGPrzEeWsLxbolDM=,OSS_DIRECTORY,merkletreejs,MerkleTreeJS


In [5]:
# Approach 2: look for any project in OP Atlas
client.to_pandas("""
    SELECT
        project_id,
        project_name AS op_atlas_project_id,
        display_name
    FROM projects_v1
    WHERE project_source = 'OP_ATLAS'
""")

Unnamed: 0,project_id,op_atlas_project_id,display_name
0,d0+th/VV4DyybzeNhR/rwUq21vxXk06/m5M/cFj6uV0=,0xaa64734cea7bda013313aec2a1a60ecc7f7dd1e4236f...,stVol
1,uaJShij8ROn3iSY1/RXM4rpoIsFfiMxiqnwz3hvZr2U=,0x7a6613bcd93169c489f4f26e0d9d2a7a566b515a5110...,Hallos
2,uCKDCr1sSPS7CrtfaLKF1lSuUVdiqxg05olV1W5960w=,0x558c98305d61f176db5e58ff2629c79669da243c4b54...,Superchain Headz
3,0ZCHPDlHDXDH3oukC8m8Y+j5fne2e8U27K0f7h4LAp0=,0x42f468879efad6fbb010a9180f84286646bbb0be1b5f...,WannaBet Weekly Tournaments
4,W+qRLLjycC/irEAyvLutPJc8vBxY2MA9LGO1TmvgY2E=,0xdd3daa81ae038baa1c4dd58bc043f063a09f9cc6ad35...,/christin on farcaster
...,...,...,...
4201,1oQNkde2mop7Zi9/zPHo0i0jgC+XaYrjoVYTNjmvq0o=,0x8624c6a494bfaa7632ff5c11553a7d9747ef96e78cc3...,gabo
4202,F5HzmjRu6nzwsVjVysAegOirPGg/mkda6frIIu2Ub8c=,0x1d8c423fe6dd98d66ada25a19e800533d573cc27bb4b...,Unitap
4203,R9FLih4SpPMNWxx0TDO64yaHObu9oNIUkxihwgxwYTQ=,0x6cb4bfbe64f50f4280280350e549d92bdabdd83dfe91...,dFantasy
4204,zRCEkA4DYlBaORjO2/HnM/49DW+j8Qn+aw/nL8uAfUs=,0x8b4a901ec1d46fae552f73782e0f8b851328eb6b4dc1...,Onchain Blobs - Randomly generated onchain blo...


In [6]:
# Approach 3: look for any GitHub org that shows up in either OSO or OP_ATLAS
df_projects = client.to_pandas("""
    WITH project_githubs AS (
      SELECT DISTINCT
        project_id,
        artifact_namespace AS github_org
      FROM artifacts_by_project_v1
      WHERE artifact_source = 'GITHUB'
    )
    SELECT
        pg.github_org,
        ARRAY_AGG(DISTINCT CASE WHEN pbc.collection_name = 'optimism' THEN p.project_name END)
          AS oso_project_files,
        ARRAY_AGG(DISTINCT CASE WHEN p.project_source = 'OP_ATLAS' THEN p.project_name END)
          AS op_atlas_project_ids,
        ARRAY_AGG(DISTINCT p.display_name) AS project_display_names,
        ARRAY_AGG(DISTINCT pg.project_id) AS oso_project_ids
    FROM project_githubs AS pg
    JOIN projects_by_collection_v1 AS pbc ON pg.project_id = pbc.project_id
    JOIN projects_v1 AS p ON pg.project_id = p.project_id
    WHERE pbc.collection_name = 'optimism' OR p.project_source = 'OP_ATLAS'
    GROUP BY 1
""")
df_projects

Unnamed: 0,github_org,oso_project_files,op_atlas_project_ids,project_display_names,oso_project_ids
0,gelatodigital,"[gelato, None]","[None, 0xa492e54cf4d492c5f8a9c16038b4e5d47cd3b...","[Gelato, Gelato Automate SDK, Gelato Relay SDK]","[CEgSH0sLKChprauip73TEAlNieWnpX+HRIV5re4xPIc=,..."
1,openzeppelin,"[openzeppelin, ethernaut-openzeppelin, None]","[None, 0x17fb589e599fe05e532b90c121eccc55b1249...","[OpenZeppelin, The Ethernaut, OpenZeppelin Con...","[EFNan6yoGom5N81+Hlb8ZSQdM3B1+iJRRM0TxITQeG8=,..."
2,theopenxproject,[theopenxproject],[None],[The Open X Project],[BWsrccmjhL8a4Qa3h3ZSCF0uxLKM9tK3ZAN9gWCY4a8=]
3,cliqueofficial,[cliqueofficial],[None],[Clique],[YDCitqeeymU1hJBfZpJ4sMuWjk1bCwFgcMKaTiUoNVE=]
4,fraxfinance,[fraxfinance],[None],[Frax Finance],[1PqWi/6kQu3j9O311xKAqoA9vYRJjIlLefadHnswYrE=]
...,...,...,...,...,...
822,mcgingras,[mcgingras-loot2],[None],[Loot2],[x27+XcfTVX+0lGH6Red5Pig20HKKVE1/EjDtsOe5knc=]
823,seedclub,[enjoy-tech],[None],[Enjoy.tech],[6rBTpjfkCGptucRNLz80FUkhaxg0Hm07bzb824NkHXA=]
824,air3app,[None],[0xf9ebb0464e7162627ab3de5fe03a21c879e932233e1...,[Air3],[dk1gwy0G/dP9SOcyolL+bfgPQJ0ngqXF6ro9qyQdGwo=]
825,arpa-network,[arpa-network],[None],[ARPA Randcast],[Ft17RswF9aY0wmGSpke3RbNbjfHh8w64ve+mewxjqH8=]


In [7]:
# How many total projects do we identify?
PROJECT_IDS = [x for lst in df_projects['oso_project_ids'] for x in lst]
len(PROJECT_IDS)

1048

## Let's pull some metrics

In [8]:
alltime_metrics = client.to_pandas("SELECT DISTINCT metric_name, display_name FROM metrics_v0 WHERE metric_name LIKE '%all_time' ORDER BY 1")
alltime_metrics.tail(5)

Unnamed: 0,metric_name,display_name
290,ZORA_active_contracts_over_all_time,Active Contracts
291,ZORA_contract_invocations_over_all_time,Contract Invocations
292,ZORA_defillama_tvl_over_all_time,Defillama TVL
293,ZORA_gas_fees_over_all_time,Gas Fees
294,ZORA_transactions_over_all_time,Transactions


In [9]:
sorted(list(alltime_metrics['display_name'].unique()))

['Active Addresses Aggregation',
 'Active Contracts',
 'Active Developers',
 'Average PR Time to Merge',
 'Average Time to First Response',
 'Closed Issues',
 'Comments',
 'Commits',
 'Contract Invocations',
 'Contributors',
 'Defillama TVL',
 'Forks',
 'Funding Received',
 'Gas Fees',
 'Merged Pull Requests',
 'Opened Issues',
 'Opened Pull Requests',
 'Releases',
 'Repositories',
 'Stars',
 'Transactions']

In [10]:
daily_metrics = client.to_pandas("SELECT DISTINCT metric_name, display_name FROM metrics_v0 WHERE metric_name LIKE '%daily' ORDER BY 1")
daily_metrics.tail(5)

Unnamed: 0,metric_name,display_name
288,ZORA_active_contracts_daily,Active Contracts
289,ZORA_contract_invocations_daily,Contract Invocations
290,ZORA_defillama_tvl_daily,Defillama TVL
291,ZORA_gas_fees_daily,Gas Fees
292,ZORA_transactions_daily,Transactions


In [11]:
df_metrics = client.to_pandas(f"""
  SELECT
    p.project_id,
    p.project_source,
    p.display_name,
    SUM(CASE WHEN m.display_name = 'Transactions' AND m.metric_name LIKE '%daily' THEN tm.amount ELSE 0 END) AS transactions,
    SUM(CASE WHEN m.display_name = 'Stars' AND m.metric_name LIKE '%all_time' THEN tm.amount ELSE 0 END) AS github_stars,
    MIN(CASE WHEN m.display_name = 'Commits' AND m.metric_name LIKE '%daily' THEN tm.sample_date ELSE null END) AS first_commit,
    MIN(CASE WHEN m.display_name = 'Transactions' AND m.metric_name LIKE '%daily' THEN tm.sample_date ELSE null END) AS first_transaction,
    MAX(CASE WHEN m.display_name = 'Active Developers' AND m.metric_name LIKE '%monthly' AND tm.sample_date = DATE '2025-02-01' THEN tm.amount ELSE 0 END) AS active_developers
  FROM timeseries_metrics_by_project_v0 AS tm
  JOIN metrics_v0 AS m ON m.metric_id = tm.metric_id
  JOIN projects_v1 AS p ON p.project_id = tm.project_id
  WHERE p.project_id IN ({stringify(PROJECT_IDS)})
  GROUP BY 1,2,3
  ORDER BY 4
""")
df_metrics.set_index(['project_id', 'project_source', 'display_name'], inplace=True)
df_metrics.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,transactions,github_stars,first_commit,first_transaction,active_developers
project_id,project_source,display_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MSfDkUunLKtfuMRdZuSxuemESQuIWKFCh1O6ySIEUs8=,OP_ATLAS,Account Abstraction - ERC-4337,41768895,1641,2021-09-29,2023-04-22,3
1tglFLCPO1i0woe4TW7pBm1iMqrhlWJ1cv2L/5b0VUk=,OSS_DIRECTORY,Infinitism (ERC-4337) - Account Abstraction,41768895,1641,2021-09-29,2023-04-22,3
Jr4krZ4lp1dMiTRDwmF2R9Hw4mF3rsukcBCM4pnF/fc=,OP_ATLAS,Uniswap on Superchain (Oku),41898432,0,2025-03-06,2023-07-16,0
o+HSkDisJ4Me0rmZT0iLKjKyUD8JxouPc31pw3ti4P8=,OSS_DIRECTORY,LayerZero,45920450,3532,2022-02-10,2022-03-15,13
pWEQHj4IsxU8lx2EDLJpphYaGAvw/yRps3vgTbVCXjQ=,OSS_DIRECTORY,Uniswap,146657957,24053,2018-10-13,2021-11-15,19


In [12]:
txn_filter  = df_metrics['transactions'].between(df_metrics['transactions'].median(), df_metrics['transactions'].quantile(0.9))
star_filter = df_metrics['github_stars'].between(10, df_metrics['github_stars'].quantile(0.8))
dev_filter  = df_metrics['active_developers'].between(1, 20)

df_metrics_filtered = df_metrics[txn_filter & star_filter & dev_filter].dropna()
df_metrics_filtered

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,transactions,github_stars,first_commit,first_transaction,active_developers
project_id,project_source,display_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
umRqPpvKJk46Da+E4636QAR+Wzwo/0FLMGDKg53GyF0=,OSS_DIRECTORY,Eco Association,1613,47,2019-09-30,2023-06-12,5
I7MpjYBji+f8RTU2z82rl+dA9zAIY7EBL1dAYG9eDGs=,OSS_DIRECTORY,Copin.io,1897,13,2023-10-02,2023-11-06,2
Icdd7UA5WRMMx/E2sEdPICMoNAWk6RX6GOHUhBkTndc=,OSS_DIRECTORY,JEFE TOKEN,2103,23,2021-12-07,2023-10-04,1
pU+8a0LECe3IuQWgozjnrKWawkn70NE2gWtx16lge10=,OSS_DIRECTORY,ZKP2P Fiat On Ramp,2228,37,2023-06-14,2023-09-22,2
7M4NI5CUzayK7YjK0VXLPGX0sxh3ashadfMP5586LII=,OSS_DIRECTORY,Kiwi News,3035,93,2022-11-17,2023-07-11,1
...,...,...,...,...,...,...,...
dmXBoFu8JBive6XzLugqtI6hNxuT/oixb5nwteXZ5Y8=,OSS_DIRECTORY,Agora,814561,91,2023-03-13,2023-02-07,9
2l+DcexWOerkhRsKXiaWZam/soE6IQ5tVe7t4K202lo=,OSS_DIRECTORY,Thales,886440,117,2021-05-31,2021-11-30,6
sZLQ7YXTrKyqwL630VpLPLJL8jAc0Pf2vv67yJ8qGlE=,OSS_DIRECTORY,LogX,902816,15,2023-02-03,2024-02-07,1
vCa+wBp0799NJ8j9L6UgKxKmqiLT95Zk/aQX/hXty7c=,OP_ATLAS,Mint Club,942397,81,2023-11-15,2024-01-19,2


In [13]:
#df_metrics_filtered.to_csv("ProjectClassification.csv", index=False)