In [1]:
from google.cloud import bigquery
import os
import pandas as pd

In [2]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../gcp_credentials.json'
client = bigquery.Client()

# Get GG projects from Metabase

In [3]:
projects = pd.read_csv('data/csv/gg20_projects_2024-05-21T17_10_42.188069Z.csv')
projects.tail(3)

Unnamed: 0,chain_id,round_id,round_name,project_name,project_github,payout_address,status,total_donations_count,total_amount_donated_in_usd,unique_donors_count
574,42161,23,Hackathon Alumni,LensView,Blockchain-Kami,0x92506959f5df5C7474B22a6D46A2af324593907a,APPROVED,61,396.68237,61
575,42161,23,Hackathon Alumni,DeStealth,DeezStealth,0x9B2FB7a8d227cDaA8002f80e8c8A99a19Bb1b969,APPROVED,40,141.14923,39
576,42161,23,Hackathon Alumni,DynOS 95,DynOS-95,0x22aA68ce286D01aD112C09480C11403A6C781dc7,APPROVED,42,229.41716,42


# Lookup OSO Project IDs

In [4]:
project_githubs = projects['project_github'].dropna().str.lower().str.strip().str.strip('/')
project_github_orgs = sorted(list(set([x.split('/')[0] for x in project_githubs if ' ' not in x])))
project_github_orgs_str = "'" + "','".join(project_github_orgs) + "'"

In [5]:
query = f"""
select distinct
    project_id,
    artifact_namespace
from `opensource-observer.oso.int_repo_metrics_by_project`
where artifact_namespace in ({project_github_orgs_str})
"""
result = client.query(query)
project_github_org_mapping = result.to_dataframe()
project_github_org_mapping.tail(3)

Unnamed: 0,project_id,artifact_namespace
294,6BxfXGCsH-o83hx52DVL09NneutKXpQeLugD4o7mr-4=,blockscout
295,16t6of5u_Oa8_u6NuPemitfuKrW4AOoQgRGCWndoslI=,patientanalyticszkp
296,1PEaVMKxvnpmaAmXugCKwbm2BAbvrTdlYTx_J-qmrPU=,blockchain-kami


In [6]:
project_ids = list(project_github_org_mapping['project_id'].unique())
project_ids_str = "'" + "','".join(project_ids) + "'"

# Get projects code metrics

In [7]:
query = f"""
select *
from `opensource-observer.oso.code_metrics_by_project_v1`
where project_id in ({project_ids_str})
"""
result = client.query(query)
metrics = result.to_dataframe()

In [8]:
metrics.tail(3)

Unnamed: 0,project_id,project_source,project_namespace,project_name,display_name,event_source,repository_count,first_commit_date,last_commit_date,star_count,...,contributor_count,contributor_count_6_months,new_contributor_count_6_months,fulltime_developer_average_6_months,active_developer_count_6_months,commit_count_6_months,opened_pull_request_count_6_months,merged_pull_request_count_6_months,opened_issue_count_6_months,closed_issue_count_6_months
294,4IF5N43rh3j9S83bUBcfoWy_2I4qcP8UulcrMsuJX-o=,OSS_DIRECTORY,oso,tor-project,The Tor Project,GITHUB,37,2018-04-23 18:35:12+00:00,2024-05-21 07:31:28+00:00,6308,...,723.0,20.0,14.0,0.2,4.0,357.0,8.0,3.0,12.0,4.0
295,fuSE0dqAq5LOxitK-7C8rKJ4_nmZMcx2JFiVs_X4pKM=,OSS_DIRECTORY,oso,geo-web,Geo Web,GITHUB,40,2020-07-10 10:25:46+00:00,2024-05-20 16:30:38+00:00,97,...,28.0,3.0,0.0,1.0,3.0,242.0,44.0,44.0,11.0,14.0
296,iilyWmaoJCXv0vPfyPQnIGYXoWyTZDlP0oQLMNIVj54=,OSS_DIRECTORY,oso,metagame-metafam,MetaGame,GITHUB,42,2019-12-02 22:15:05+00:00,2024-04-16 06:55:46+00:00,248,...,143.0,13.0,2.0,0.5,5.0,202.0,76.0,57.0,35.0,58.0


# Join metrics

In [9]:
df = projects.copy()
df['project_github_org_space'] = df['project_github'].apply(lambda x: x.lower().strip().strip('/').split('/')[0] if isinstance(x, str) else None)
df = pd.merge(
    left=df,
    left_on='project_github',
    right=project_github_org_mapping,
    right_on='artifact_namespace'
)
df = pd.merge(
    left=df,
    left_on='project_id',
    right=metrics,
    right_on='project_id'
)
df.tail(3)

Unnamed: 0,chain_id,round_id,round_name,project_name_x,project_github,payout_address,status,total_donations_count,total_amount_donated_in_usd,unique_donors_count,...,contributor_count,contributor_count_6_months,new_contributor_count_6_months,fulltime_developer_average_6_months,active_developer_count_6_months,commit_count_6_months,opened_pull_request_count_6_months,merged_pull_request_count_6_months,opened_issue_count_6_months,closed_issue_count_6_months
204,42161,23,Hackathon Alumni,AdLand,adcommune,0x26bBec292e5080ecFD36F38FF1619FF35826b113,APPROVED,37,92.50932,34,...,1.0,1.0,1.0,0.5,1.0,375.0,97.0,88.0,14.0,8.0
205,42161,23,Hackathon Alumni,Coordination-Play,coordination-play,0x955Af1c1637Facf4dD5d9D2428e073573dAD5699,APPROVED,11,29.940489,11,...,6.0,6.0,6.0,0.0,6.0,52.0,0.0,0.0,0.0,0.0
206,42161,23,Hackathon Alumni,A Bank for DAOs,joshua-and,0x977841f226482F7938e179f6Fc6F45c175252114,APPROVED,22,24.53841,21,...,3.0,3.0,3.0,0.0,1.0,43.0,0.0,0.0,4.0,2.0


In [11]:
df.to_csv("data/GG20xOSO_GitHub_Metrics.csv")