In [1]:
from google.cloud import bigquery
import json
import os
import pandas as pd

GCP_PROJECT = 'opensource-observer'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../../oso_gcp_credentials.json'
client = bigquery.Client(GCP_PROJECT)

In [2]:
query = """
select distinct
  p1.display_name as `Onchain Project`,
  p2.display_name as `Devtool Project`,
  concat('https://github.com/', repo.artifact_namespace, '/', repo.artifact_name) as `Devtool URL`,
  omp.gas_fees_sum_6_months as `Onchain Gas - 6 Months`,
  omp.transaction_count_6_months as `Onchain Transactions - 6 Months`,
  repo.star_count as `Devtool Star Count`,
  repo.fork_count as `Devtool Fork Count`,
  repo.created_at as `Devtool Created Date`,
  repo.language as `Devtool Language`,
  repo.license_name as `License`
from `oso.odt_int__devtool_to_onchain_project_registry` r
join `oso.onchain_metrics_by_project_v1` omp
  on r.onchain_project_id = omp.project_id
join `oso.projects_v1` p1
  on omp.project_id = p1.project_id
join `oso.code_metrics_by_project_v1` cmp
  on r.devtool_project_id = cmp.project_id
join `oso.projects_v1` p2
  on cmp.project_id = p2.project_id
join `oso.repositories_v0` repo
  on r.devtool_artifact_id = repo.artifact_id
where
  r.edge_type = 'npm_package'
  and omp.event_source = 'OPTIMISM'
  and omp.gas_fees_sum_6_months > 1
  and repo.star_count > 100
  and p1.project_id in (
    select project_id
    from `oso.onchain_metrics_by_project_v1`
    where
      event_source = 'OPTIMISM'
      and project_id in (select project_id from `oso.projects_by_collection_v1` where collection_name = 'op-retrofunding-4')
    order by gas_fees_sum_6_months desc
    limit 12
  )
  and p2.project_id in (
    select project_id
    from `oso.projects_by_collection_v1` where collection_name = 'op-rpgf3'
  )
  order by p2.display_name
"""

result = client.query(query)
df = result.to_dataframe()
df.tail()

Unnamed: 0,Onchain Project,Devtool Project,Devtool URL,Onchain Gas - 6 Months,Onchain Transactions - 6 Months,Devtool Star Count,Devtool Fork Count,Devtool Created Date,Devtool Language,License
119,dHedge,wevm,https://github.com/wevm/viem,10.267768,285481.0,2648,923,2022-07-22 00:54:04+00:00,TypeScript,Other
120,Odos,wevm,https://github.com/wevm/viem,2.951546,376202.0,2648,923,2022-07-22 00:54:04+00:00,TypeScript,Other
121,Synthetix,wevm,https://github.com/wevm/wagmi,31.116411,515495.0,6097,1114,2021-11-23 03:11:46+00:00,TypeScript,MIT License
122,Odos,wevm,https://github.com/wevm/wagmi,2.951546,376202.0,6097,1114,2021-11-23 03:11:46+00:00,TypeScript,MIT License
123,Kwenta,wevm,https://github.com/wevm/wagmi,4.298871,172471.0,6097,1114,2021-11-23 03:11:46+00:00,TypeScript,MIT License


In [3]:
df.to_csv('data/pairwise/sample_pairwise_dump.csv')

In [4]:
firstLevelCategoryList = list(df['Onchain Project'].unique())
with open("data/pairwise/get1stLevelCategoryList.json", "w") as f:
    json.dump(firstLevelCategoryList, f, indent=2)

projectsForCategory = df.groupby('Onchain Project')['Devtool URL'].apply(list).to_dict()
with open("data/pairwise/getProjectsForCategory.json", "w") as f:
    json.dump(projectsForCategory, f, indent=2)

projectMetadata = (
    df
    .drop(columns=['Onchain Project', 'Onchain Gas - 6 Months', 'Onchain Transactions - 6 Months'])
    .drop_duplicates()
    .set_index('Devtool URL')
    .rename(columns={
        'Devtool Project': 'Project Name',
        'Devtool Star Count': 'Stars',
        'Devtool Fork Count': 'Forks',
        'Devtool Created Date': 'Created Date',
        'Devtool Language': 'Language'
    })
)
projectMetadata.to_json(
    "data/pairwise/getProjectMetadata.json", 
    orient='index',
    indent=2
)