In [1]:
import json
import math
from pathlib import Path
from urllib.parse import quote

import requests
from dotenv import dotenv_values
from ghapi.all import GhApi

current_script_dir = Path("demo_scripts")

In [2]:
env_config = dotenv_values(current_script_dir / ".env")
gh_token = None
if "GITHUB_TOKEN" in env_config:
    gh_token = env_config["GITHUB_TOKEN"]
if gh_token is None:
    raise ValueError("GITHUB_TOKEN not found in .env file")

In [3]:
# api = GhApi(token=gh_token)
# api.search.code(q='"CAPI=2:"+path:*.core', per_page=20, page=1)

q = '"CAPI=2:" AND "filesets:"'
q_enc = quote(q)
per_page = 100

repos = set()
headers = {"Authorization": f"token {gh_token}"}
r = requests.get(
    f"https://api.github.com/search/code?q={q_enc}&per_page={1}&page={1}",
    headers=headers,
)
data = r.json()
total_count = data["total_count"]
pages = math.ceil(total_count / per_page)
for i in range(1, pages + 1):
    r = requests.get(
        f"https://api.github.com/search/code?q={q_enc}&per_page={per_page}&page={i}",
        headers=headers,
    )
    data = r.json()
    items = data["items"]
    for item in items:
        repos.add(item["repository"]["full_name"])

In [7]:
output_dir = current_script_dir / "output"
output_dir.mkdir(exist_ok=True)
with open(output_dir / "github_fusesoc_cores_search.json", "w") as f:
    json.dump(list(repos), f, indent=4)