In [1]:
import json
import numpy as np
import pandas as pd
import requests

from dotenv import load_dotenv
import os
from pyoso import Client
from collections import defaultdict

In [2]:
protocols = requests.get("https://api.llama.fi/protocols").json()

In [3]:
load_dotenv()
client = Client(api_key=os.environ["OSO_API_KEY"])

df_oso = client.to_pandas("""
SELECT DISTINCT
  p.project_name,
  CASE
    WHEN artifact_source = 'GITHUB' THEN artifact_namespace
    ELSE artifact_name
  END AS artifact,
  a.artifact_source
FROM int_artifacts_by_project_in_ossd AS a
JOIN projects_v1 AS p ON a.project_id = p.project_id
WHERE a.artifact_source IN ('GITHUB', 'DEFILLAMA', 'TWITTER')
""")

proj_artifacts = defaultdict(lambda: defaultdict(list))

for _, row in df_oso.iterrows():
    src   = row.artifact_source
    art   = str(row.artifact).lower()
    pname = row.project_name
    if pname not in proj_artifacts[src][art]:
        proj_artifacts[src][art].append(pname)

project_artifacts = {k: dict(v) for k, v in proj_artifacts.items()}

In [4]:
cols = ["id","name","slug","url","description","twitter","github","parentProtocol","tvl","chains"]
df = pd.json_normalize(protocols)[cols]

df = (
    df.assign(
        twitter=lambda d: d["twitter"].str.lower(),
        parentProtocol=lambda d: d["parentProtocol"].str.replace("^parent#", "", regex=True),
        github=lambda d: d["github"].apply(
            lambda lst: (lst[0].lower() if isinstance(lst, list) and lst else None)
        )
    )
)

target_chains = {
    "Ethereum","Base","Arbitrum","Sonic","Optimism",
    "Polygon","Celo","Scroll","zkSync Era",
}

df = df[
    (df["tvl"] > 1_000_000) &
    (df["chains"].apply(lambda ch: len(set(ch) & target_chains) > 1))
]

def _lookup(src, key):
    return project_artifacts[src].get(key, []) if key else []

df = df.assign(
    oso_by_slug=lambda d: d["slug"].apply(lambda s: _lookup("DEFILLAMA", s)),
    oso_by_github=lambda d: d["github"].apply(lambda g: _lookup("GITHUB", g)),
    oso_by_twitter=lambda d: d["twitter"].apply(lambda t: _lookup("TWITTER", t)),
)

df["defillama_mappings"] = df.apply(
    lambda r: list({*r.oso_by_slug, *r.oso_by_github, *r.oso_by_twitter}), axis=1
)
df["len_defillama_mappings"] = df["defillama_mappings"].str.len()
df.tail()

Unnamed: 0,id,name,slug,url,description,twitter,github,parentProtocol,tvl,chains,oso_by_slug,oso_by_github,oso_by_twitter,defillama_mappings,len_defillama_mappings
1497,4390,Incognito,incognito,https://incognito.financial/,Incognito is a one-stop-shop that allows users...,incognito_dex,,,1140879.0,"[Ethereum, Binance, Polygon, Fantom, Avalanche]",[],[],[],[],0
1504,4790,Bridgers,bridgers,https://bridgers.ai/,"Unbounded Swap, Infinite Possibilities",bridgersxyz,,,1113435.0,"[Ethereum, Binance, Tron, Polygon, Arbitrum, O...",[],[],[bridgers],[bridgers],1
1511,228,FutureSwap,futureswap,https://www.futureswap.com/,Futureswap is a decentralized perpetuals excha...,futureswapx,futureswap,,1090598.0,"[Arbitrum, Ethereum, Avalanche]",[],[futureswap],[],[futureswap],1
1523,4197,Secured Finance Lending,secured-finance-lending,https://secured.finance,Secured Finance built a full on-chain Orderboo...,secured_fi,,secured-finance,1053973.0,"[Filecoin, Ethereum, Arbitrum, Polygon zkEVM, ...",[],[],[secured-finance],[secured-finance],1
1541,2567,Timeswap V2,timeswap-v2,https://app.timeswap.io,Timeswap is the first oracleless lending/borro...,timeswaplabs,,timeswap,1004057.0,"[Arbitrum, Hyperliquid, Optimism, Base, Ethere...",[],[],[timeswap-labs],[timeswap-labs],1


In [5]:
updates = (
    df[(df['len_defillama_mappings'] == 1) 
    & (df['oso_by_slug'].apply(len) < 1)]
    [['name', 'slug', 'url', 'twitter', 'github', 'defillama_mappings']]
).copy()

updates.rename(columns={
    'name': 'display_name',
    'slug': 'defillama_protocol_slug',
    'defillama_mappings': 'oso_project_filename'
}, inplace=True)

#updates.to_dict(orient='records')

In [6]:
new_projects = (
    df[(df['len_defillama_mappings'] == 0) 
    & (df['github'].isna()==False)]
    [['name', 'description', 'url', 'github', 'twitter', 'slug']]
).copy()

new_projects.rename(columns={
    'name': 'display_name',
    'slug': 'defillama_protocol_slug',
    'defillama_mappings': 'oso_project_filename'
}, inplace=True)

#new_projects.to_dict(orient='records')