In [1]:
from dotenv import load_dotenv
import os
import pandas as pd
import trino

load_dotenv()

True

In [2]:
conn = trino.dbapi.connect(
    host=os.environ['TRINO_HOST'],
    port=os.environ['TRINO_PORT'],
    user=os.environ['TRINO_USER'],
    catalog='iceberg',
)
cursor = conn.cursor()

In [3]:
def query(q):
    cursor.execute(q)
    rows = cursor.fetchall()
    return pd.DataFrame(rows, columns=[desc[0] for desc in cursor.description])

def stringify(arr):
    return "'" + "','".join(arr) + "'"

In [4]:
df_onchain_projects = query("""
    SELECT
        projects.project_id,
        projects.project_name,
        projects.display_name
    FROM oso.projects_v1 AS projects
    JOIN oso.projects_by_collection_v1 AS pbc ON projects.project_id = pbc.project_id
    WHERE pbc.collection_name = 'op-retrofunding-4'        
""")
df_onchain_projects.tail()

Unnamed: 0,project_id,project_name,display_name
225,0vxdfbwEGwn4il9c4oGyjno56OLaf+q61GARCJvMK0U=,kiwi-news-attestate,Kiwi News
226,UQrhi8C2lSXtgkojDyC0Vqo4NvwHsYkOFRDUhlVkv80=,kwenta,Kwenta
227,Q+XTsAp+ararE1O+9yqnHcfnkgqAeft2enVBXfjpDb4=,l2pass,L2Pass
228,I1ULrUY9t789vj4akEP3lLgB9hmC1Unfg7875NEdWL0=,lifinance,LI.FI
229,UF36qYsUzfwX6GDNuWgpDXrqsmrLcK4Rw5D4hZY5Pxc=,layer-zero,LayerZero


In [5]:
df_devtooling_projects = query("""
SELECT DISTINCT 
    pbc.project_id,
    pbc.project_name,
    p.display_name,
    MIN(r.created_at) AS created_at,
    MAX(r.updated_at) AS updated_at,
    SUM(r.star_count) AS star_count,
    SUM(r.fork_count) AS fork_count,
    SUM(r.num_packages_in_deps_dev) AS num_packages_in_deps_dev
FROM oso.projects_by_collection_v1 AS pbc
JOIN oso.projects_v1 AS p ON pbc.project_id = p.project_id
JOIN oso.int_repositories_enriched AS r ON p.project_id = r.project_id
WHERE
    pbc.collection_name IN ('op-rpgf3')
    AND p.project_name NOT IN ('optimism', 'opensource-observer', 'gitcoin')
GROUP BY 1,2,3
ORDER BY 6 DESC
""")
df_devtooling_projects.tail()

Unnamed: 0,project_id,project_name,display_name,created_at,updated_at,star_count,fork_count,num_packages_in_deps_dev
297,e15X5AXuUYoTxk/FJ6fYzl99lUQA7tNryV9jmug6JGI=,basepunkorg,Base Punk NFT & Social-Fi Dapps,2023-09-30 10:01:31+00:00,2024-06-02 17:17:35+00:00,0,0,0
298,wUzVyEbU4SPqKc3sdc4Bj8/R9htKiXnZPPPaECmzNw0=,opdelegate-michael-vander-meiden,OPdelegate.com,2023-08-25 23:03:54+00:00,2023-08-26 01:15:20+00:00,0,0,0
299,IofSAq9drqeaKp67zIFUYM+kFNuMt50ldke4Etk+39o=,heno-relief-game-sweetmantech,Sweetman,2023-07-25 23:18:19+00:00,2024-06-24 18:44:29+00:00,0,1,0
300,LwP3a+36HgIcTCZ4AqNtcclM4BySp6i/jE4iW6GYUu0=,l2planet,L2 Planet,2022-07-08 14:48:53+00:00,2023-10-03 16:19:07+00:00,0,1,1
301,tR8yL0KfQz9DFF7Avnaq/o5XIAMUBLks5nPHrL3LpJE=,kairosresearch,Kairos Research,2024-03-04 00:31:02+00:00,2024-12-21 18:46:06+00:00,0,1,0


In [6]:
ONCHAIN_PROJECT_IDS = list(df_onchain_projects.project_id.unique())
NUM_ONCHAIN_PROJECTS = len(ONCHAIN_PROJECT_IDS)
DEVTOOLING_PROJECT_IDS = list(df_devtooling_projects.project_id.unique())
NUM_DEVTOOLING_PROJECTS = len(DEVTOOLING_PROJECT_IDS)
PROJECT_IDS = ONCHAIN_PROJECT_IDS + DEVTOOLING_PROJECT_IDS
PROJECT_NAMES = {
    **df_onchain_projects.set_index('project_id')['display_name'].to_dict(),
    **df_devtooling_projects.set_index('project_id')['display_name'].to_dict(),
}

In [7]:
df_project_dependencies = query(f"""
SELECT DISTINCT
    obp.project_id AS onchain_builder_project_id,
    dtp.project_id AS devtooling_project_id,
    deps.dependency_source
FROM oso.int_code_dependencies AS deps
JOIN oso.int_repositories_enriched AS obp ON deps.dependent_artifact_id = obp.artifact_id
JOIN oso.int_repositories_enriched AS dtp ON deps.dependency_artifact_id = dtp.artifact_id
WHERE
    obp.project_id != dtp.project_id
    AND obp.project_id IN ( {stringify(ONCHAIN_PROJECT_IDS)} )
    AND dtp.project_id IN ( {stringify(PROJECT_IDS)} )
    AND obp.updated_at >= date('2024-01-01')
    AND dtp.updated_at >= date('2024-01-01')
""")
df_project_dependencies.tail()

Unnamed: 0,onchain_builder_project_id,devtooling_project_id,dependency_source
1367,LNQR7by2d4x/GcRrl47/US9ZJ1j12JkObQoxSG6oUlM=,MCpsUwYZfHNmAmI2Msxk5JorgS4Bgt7/3vvFT5HHnGw=,GO
1368,gSNSfKjRbnqInKzb0X33PNfl2KcBb/geZNcaBrvysV0=,57ErQ5loSD9BlgJCB1nUOKthiQC6uxZ6ZESdKngaphM=,NPM
1369,/Fu7cQ3Cn6mE8Ybh0K0/tejumWmDWmqP2Mh6CpVI2Oc=,Q8XI5uvgwOT41A1eWKKVRoQdzjDvYXRtVLpT6n7KVrg=,NPM
1370,5VkD5cN8xZtehiLcOgtLJsWoekAgzc5g8M5L5XWcsis=,D3mUC1uetKFVx1xSk39LA6L7IG93NmwmCIqjArMu8i4=,NPM
1371,SV5kdZqv7HuVnB0A9hgFHPi3lubBZ0Tiu0zZS+XMZ+A=,p0d52ry1DHeogcfqG6LhWQDbBYD473gv6MLSMmsdEdo=,NPM


In [8]:
dtp_query = f"""
WITH dev_events AS (
    SELECT
        events.time,
        events.from_artifact_id as developer_id,
        devs.developer_name,
        events.to_artifact_id,
        repos.language,
        repos.project_id,
        events.event_type
    FROM oso.int_events__github AS events
    JOIN oso.int_repositories_enriched AS repos ON events.to_artifact_id = repos.artifact_id
    JOIN oso.int_developer_activity_by_repo AS devs ON events.from_artifact_id = devs.developer_id
),
onchain_developers AS (
    SELECT DISTINCT developer_id
    FROM dev_events
    WHERE 
        event_type = 'COMMIT_CODE'
        AND time >= date('2024-01-01') 
        AND language IN ('TypeScript', 'Solidity', 'Rust', 'Vyper')
        AND project_id IN ( {stringify(ONCHAIN_PROJECT_IDS)} )
)
SELECT DISTINCT
    date_trunc('month', time) as event_month,
    project_id,
    developer_id,
    developer_name,
    event_type
FROM dev_events
WHERE
    developer_id IN (SELECT developer_id FROM onchain_developers)
    AND project_id IN ( {stringify(PROJECT_IDS)} )
    AND event_type IN (
        'FORKED',
        'STARRED',
        'COMMIT_CODE',
        'ISSUE_COMMENT',
        'ISSUE_OPENED',
        'PULL_REQUEST_OPENED',
        'PULL_REQUEST_REVIEW_COMMENT'
    )
"""
#developers_to_projects_graph = query(dtp_query)
#developers_to_projects_graph.tail()