In [2]:
from google.cloud import bigquery
import pandas as pd
import os

In [3]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../../oso_gcp_credentials.json'
GCP_PROJECT = 'opensource-observer'
client = bigquery.Client(GCP_PROJECT)

In [4]:
def stringify(lst):
    lst = [x for x in lst if x]
    return "('" + "','".join(lst) + "')"

In [5]:
languages = ['Solidity', 'TypeScript', 'Rust', 'Vyper']

In [6]:
results = client.query(f"""
    select
      onchain_repos.project_id as onchain_project_id,
      onchain_repos.artifact_id as onchain_repo_artifact_id,
      onchain_repos.artifact_url as onchain_project_github_url,
      onchain_repos.language as onchain_project_github_language,
      p.project_name as devtooling_project_name,
      p.display_name as devtooling_project_display_name,
      dt_repos.artifact_url as devtooling_project_github_url,
      deps.dependency_source as dependency_source,
      deps.dependency_name as dependency_name
    from `oso.repositories_v0` as onchain_repos
    join `oso.int_code_dependencies` as deps
      on onchain_repos.artifact_id = deps.dependent_artifact_id
    join `oso.int_repositories_enriched` as dt_repos
      on deps.dependency_artifact_id = dt_repos.artifact_id
    join `oso.projects_v1` as p
      on dt_repos.project_id = p.project_id
    where
      onchain_repos.project_id in (
        select project_id from `oso.projects_by_collection_v1`
        where collection_name = 'op-onchain'
      )
      and onchain_repos.language in {stringify(languages)}
      and onchain_repos.project_id != p.project_id""")
df_deps = results.to_dataframe()
df_deps.tail()

Unnamed: 0,onchain_project_id,onchain_repo_artifact_id,onchain_project_github_url,onchain_project_github_language,devtooling_project_name,devtooling_project_display_name,devtooling_project_github_url,dependency_source,dependency_name
1784938,EhPGap_lKavTMqT2DUdCSDKsQaAiG99v0Rq__pINhMM=,ENt2JO42WRnXY1C8Mep2oyQ7dTL2nOJS5xtPKyz4Ta8=,https://github.com/1inch/walletconnect-monorepo,TypeScript,ionicabizau,Ionică Bizău (Johnny B.),https://github.com/ionicabizau/protocols,NPM,protocols
1784939,m3u9RgYlYchemnTKZDZSZV9xLGReVz_KGG5sV0tIrUE=,nItG9OP8odd7cA_uUR6zebN6rKSSVHohISET1_gX6_w=,https://github.com/lidofinance/avalanche-contr...,Solidity,gorilla,Gorilla web toolkit,https://github.com/gorilla/mux,GOLANG,github.com/gorilla/mux
1784940,WPjCCIDK0pJh-tTktKDnWPNNhP8yjWYUxixrmtComxE=,fH1b5Xv-Ge5c3MHvumQlYHXFqpyTrkZ-94TPN_qDu_o=,https://github.com/seamless-protocol/interface,TypeScript,syntax-tree,syntax-tree,https://github.com/syntax-tree/mdast-util-gfm-...,NPM,mdast-util-gfm-footnote
1784941,gHO2CKahfi2EMVYX6Q5B6ZvLcbnTaNrCwk1lniDsEDM=,f8N7azKUMO3CXVfF3f4BzK2Z8yzYcECu1lPq2rEoKko=,https://github.com/thirdweb-dev/dashboard,TypeScript,syntax-tree,syntax-tree,https://github.com/syntax-tree/mdast-util-gfm-...,NPM,mdast-util-gfm-footnote
1784942,LNQR7by2d4x_GcRrl47_US9ZJ1j12JkObQoxSG6oUlM=,mJiRErziryQu8ZwIcUgjx7ogyM1blnouvNSAVWfAwbQ=,https://github.com/wormhole-foundation/wormhol...,TypeScript,grpc-ecosystem,gRPC Ecosystem,https://github.com/grpc-ecosystem/go-grpc-prom...,GOLANG,github.com/grpc-ecosystem/go-grpc-prometheus


In [7]:
df_deps['onchain_project_id'].nunique()

344

In [8]:
len(df_deps.drop_duplicates(subset=['dependency_source', 'dependency_name']))

8436

In [9]:
df_deps.groupby('dependency_source')['dependency_name'].nunique()

dependency_source
CARGO     1283
GOLANG     383
NPM       6663
PYPI       107
Name: dependency_name, dtype: int64

In [10]:
devtooling_projects = df_deps['devtooling_project_name'].unique()
len(devtooling_projects)

758

In [11]:
results = client.query(f"""
    select distinct project_name from oso.projects_by_collection_v1
    where
        collection_name in ('op-rpgf1', 'op-rpgf2', 'op-rpgf3', 'op-retrofunding-4', 'op-retrofunding-5')
        and project_name in {stringify(devtooling_projects)}
""")
df_rf_devtools = results.to_dataframe()
df_rf_devtools.tail()

Unnamed: 0,project_name
93,synpress-synthexio
94,alloy-rs
95,abi-to-sol-gnidan
96,limechain
97,yearn


In [12]:
rf_projects = list(df_rf_devtools['project_name'].unique())
len(rf_projects)

98

In [13]:
len(
    df_deps[df_deps['devtooling_project_name'].isin(rf_projects)]
    .drop_duplicates(subset=['dependency_source', 'dependency_name'])
)

981

In [14]:
len(df_deps[df_deps['devtooling_project_name'].isin(rf_projects)])

135146

In [15]:
repo_ids = df_deps['onchain_repo_artifact_id'].unique()

In [16]:
results = client.query(f"""
    select
        repos.project_id,
        devs.*,
    from `oso.int_developer_activity_by_repo` as devs
    join `oso.repositories_v0` as repos
        on devs.repo_artifact_id = repos.artifact_id
    where
        devs.repo_artifact_id in {stringify(repo_ids)}
        and devs.event_type = 'COMMIT_CODE'
        and devs.last_event > '2024-07-01'
""")
df_devs = results.to_dataframe()
df_devs.tail()

Unnamed: 0,project_id,repo_artifact_id,developer_id,developer_name,event_type,first_event,last_event,total_events
2943,zuKa56R3a-6e4jZZUfd7UOWS66f0hkwUMt8uFboQArk=,9PwlUv2jjyPPBQDWM2s6IPtSEde7Un23XnSmRhprhkw=,78BZYVz_4d4XJN-PTj9WSPrpCDEn1TnBvmpJqZ_OUMo=,arcticfloyd1984,COMMIT_CODE,2024-03-17 06:58:09+00:00,2024-09-30 07:03:31+00:00,22
2944,zuKa56R3a-6e4jZZUfd7UOWS66f0hkwUMt8uFboQArk=,BjsEezmJotvTqDpTpI4ni9cGi6FVffT9r52duhAlulw=,Xy_2ujuYTLvT4PeMoAWiVheKEiDy4GNuJHOiQYtltvY=,vgabriel45,COMMIT_CODE,2024-10-10 11:33:30+00:00,2025-01-15 11:30:11+00:00,9
2945,zuKa56R3a-6e4jZZUfd7UOWS66f0hkwUMt8uFboQArk=,0hZviUUKi0wLhWT2wo2hXAvRouir1-FFB4FaTnO9vWI=,aiqNgLhPwljZ8DSPrl1J-aOvxQJEXjOIQ6t6RCRDCpQ=,filmakarov,COMMIT_CODE,2024-12-10 11:20:37+00:00,2025-01-16 11:29:57+00:00,9
2946,zuKa56R3a-6e4jZZUfd7UOWS66f0hkwUMt8uFboQArk=,p-GeVemaxpbRuC-Q_1M5V39ndAmGfPrMurKc8gIGXRg=,e-jHy9eBeX6c7QJpPmRe3p9IY2u2uu7HdYqiqc-A9Tw=,joepegler,COMMIT_CODE,2024-11-16 09:13:59+00:00,2025-01-14 10:22:07+00:00,18
2947,zuKa56R3a-6e4jZZUfd7UOWS66f0hkwUMt8uFboQArk=,Ml0xp2VJuP2aEwrN4HQWKo75-7dI1gS6TGyC1c1_lAM=,eB0VI7Av8d-xPOyqesqn7voy5J6bjgNQPO2DWieoUIk=,himanshugarg06,COMMIT_CODE,2024-09-25 07:02:24+00:00,2024-11-06 15:46:37+00:00,21


In [17]:
df_devs['developer_id'].nunique()

1370

In [18]:
dff_devs = df_devs.groupby(['developer_id', 'project_id'], as_index=False).agg({'first_event': 'min', 'last_event': 'max'})
dff_devs['time_delta'] = (dff_devs['last_event'] - dff_devs['first_event']).dt.days
dff_devs = dff_devs[dff_devs['time_delta'] > 90]
dff_devs

Unnamed: 0,developer_id,project_id,first_event,last_event,time_delta
0,--RLSsi0OSfeREuBHlGyuYX9U__7KPlD88yMWHct8is=,SOp_QHGNf549Vq-StBPnwRG1ELIMsmLNEgqDtBjM8Eg=,2022-07-26 16:03:58+00:00,2024-12-04 16:47:02+00:00,862
3,-GJJYx8arjz7yfQWDXp3Q5JHybS5s63dsyJfpOWUt30=,m3u9RgYlYchemnTKZDZSZV9xLGReVz_KGG5sV0tIrUE=,2023-03-03 09:53:17+00:00,2025-01-10 09:00:47+00:00,678
4,-I9rt92gB6cY9HnLosiN1DjSiURHeXwzUONDMuDijpk=,wluzMsctrN335VW_uT-iQsSWZrfLjSRCxOvVVUB8NgQ=,2024-06-26 07:28:13+00:00,2024-11-12 09:49:06+00:00,139
5,-L_IScJgY4IskozeHSXCPSkkJyZcPW8FNqhw0PSn71U=,sbnlvN2OV8y140ijKiAr12nnhEkcKAYZyaxtOZOteqs=,2024-09-23 08:19:30+00:00,2025-01-20 10:25:57+00:00,119
8,-TMjpa2x3KhCp-zXy7fvZn4oSWihUMQ8AzwyAEzTwec=,lTrJShA8bLeyWUn-DHIuT_7591FZ7PXqXscO6gVX4N0=,2022-05-25 20:18:24+00:00,2024-09-28 22:53:39+00:00,857
...,...,...,...,...,...
1382,zkEitqYPNcdUk6yw9usAMUw-RbjieIEamAtn30zKBEQ=,LzX2HYHwOpaft7pcY2yLj5FKl1BwxRnDCekysi6xTVA=,2024-05-28 19:37:02+00:00,2024-12-21 01:46:30+00:00,206
1383,zlw_Lq1Q3a0Hx036_DNLSPu8h8Mwf7v3pJJzAjnamm0=,UQrhi8C2lSXtgkojDyC0Vqo4NvwHsYkOFRDUhlVkv80=,2023-03-30 20:40:53+00:00,2024-11-06 18:32:21+00:00,586
1384,zoBw4be-gmIDwJ2m6J3lyWVzPFAzdsA0YNXyHgwnlFw=,yIdtVpj9_Iq1eXhjegIKN6sPnkzDsozlYtmd9xhYIkM=,2023-05-22 20:00:09+00:00,2025-01-17 13:41:58+00:00,605
1385,zoaeAHxTgz2NT4MPi184gLfGBRC3bwNyZM7cH2llMnc=,SV5kdZqv7HuVnB0A9hgFHPi3lubBZ0Tiu0zZS-XMZ-A=,2023-08-20 20:16:44+00:00,2024-12-24 07:55:55+00:00,491


In [19]:
dff_devs['developer_id'].nunique()

942

In [20]:
dff_devs['project_id'].nunique()

206