In [1]:
from dotenv import load_dotenv
import os
import pandas as pd
from pyoso import Client

load_dotenv()
OSO_API_KEY = os.environ['OSO_API_KEY']
client = Client(api_key=OSO_API_KEY)
stringify = lambda arr: "'" + "','".join(arr) + "'"

In [2]:
df_projects = client.to_pandas("""

WITH relevant_metrics AS (
  SELECT
    metric_id,
    metric_name,
    CASE
      WHEN LOWER(metric_name) LIKE '%transactions%' THEN 'transactions'
      WHEN LOWER(metric_name) LIKE '%commits%' THEN 'commits'
      ELSE 'other'
    END AS metric_category
  FROM metrics_v0
  WHERE
    LOWER(metric_name) LIKE '%transactions%'
    OR LOWER(metric_name) LIKE '%commits%'
),
project_metrics_last_year AS (
  SELECT
    a.project_id,
    c.display_name AS project_display_name,
    b.metric_category,
    a.amount
  FROM timeseries_metrics_by_project_v0 AS a
  JOIN relevant_metrics AS b ON a.metric_id = b.metric_id
  JOIN projects_v1 AS c ON a.project_id = c.project_id
  WHERE
    a.sample_date >= current_date - INTERVAL '1' YEAR
    AND c.project_source = 'OSS_DIRECTORY'
    AND b.metric_category IN ('transactions', 'commits') 
),
aggregated_metrics_per_project AS (
  SELECT
    project_id,
    project_display_name,
    SUM(CASE WHEN metric_category = 'transactions' THEN amount ELSE 0 END) AS total_transactions,
    SUM(CASE WHEN metric_category = 'commits' THEN amount ELSE 0 END) AS total_commits
  FROM project_metrics_last_year
  GROUP BY
    project_id,
    project_display_name
),
ranked_by_transactions AS (
  SELECT
    project_id,
    project_display_name,
    total_transactions,
    total_commits,
    RANK() OVER (ORDER BY COALESCE(total_transactions, 0) DESC) AS transaction_rank
  FROM aggregated_metrics_per_project
)
SELECT
  project_id,
  project_display_name,
  ROUND(COALESCE(total_transactions, 0)) AS total_transactions_last_year,
  ROUND(COALESCE(total_commits, 0)) AS total_commits_last_year
FROM ranked_by_transactions
WHERE transaction_rank <= 50
ORDER BY total_commits_last_year DESC
LIMIT 30

""")

In [3]:
PROJECT_IDS = list(df_projects['project_id'].unique())

In [4]:
df_repos = client.to_pandas(f"""
WITH ranked_repos AS (
  SELECT
      project_id,
      artifact_url,
      language,
      updated_at,
      star_count,
      RANK() OVER (
          PARTITION BY project_id
          ORDER BY star_count DESC
      ) AS repo_rank
  FROM repositories_v0
  WHERE language IN ('Solidity', 'TypeScript')
    AND project_id IN ({stringify(PROJECT_IDS)})
)

SELECT *
FROM ranked_repos
WHERE repo_rank <= 3
ORDER BY project_id, repo_rank
""")
df_repos.tail()

Unnamed: 0,project_id,artifact_url,language,updated_at,star_count,repo_rank
97,v9C8yfcPG3QTWruewgVAij9YK5/W6p+8H4kaTIllhjE=,https://github.com/sushiswap/sushiswap-interface,TypeScript,2025-01-20 15:33:12.000 UTC,377,2
98,v9C8yfcPG3QTWruewgVAij9YK5/W6p+8H4kaTIllhjE=,https://github.com/sushiswap/trident,TypeScript,2025-02-04 13:30:22.000 UTC,232,3
99,ziS9zVwL0wejwpRl3exXS/sPkVTxes5OeRkH1buQ4Y4=,https://github.com/perpetual-protocol/perpetua...,TypeScript,2025-04-28 17:28:19.000 UTC,159,1
100,ziS9zVwL0wejwpRl3exXS/sPkVTxes5OeRkH1buQ4Y4=,https://github.com/perpetual-protocol/perp-cur...,TypeScript,2025-04-23 00:43:01.000 UTC,79,2
101,ziS9zVwL0wejwpRl3exXS/sPkVTxes5OeRkH1buQ4Y4=,https://github.com/perpetual-protocol/perp-cur...,TypeScript,2025-04-10 08:57:42.000 UTC,48,3


In [5]:
df_projects.merge(df_repos, on='project_id').to_csv("repo_shortlist.csv")