In [1]:
import requests
import json

In [9]:
GITHUB_TOKEN = ""
REPO_OWNER = "microbiomedata"
REPO_NAME = "nmdc-schema"
OUTPUT_FILE = "nmdc_schema_discussions.json"

In [12]:
def validate_token(token: str) -> bool:
    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/vnd.github.v3+json"
    }

    # Check basic token validity with REST API
    rest_resp = requests.get("https://api.github.com/user", headers=headers)
    if rest_resp.status_code == 401:
        print("❌ Token is invalid or expired (REST API).")
        return False

    username = rest_resp.json().get("login", "unknown")
    print(f"✅ REST API access OK. Authenticated as: {username}")

    # Check token works with GraphQL
    graphql_headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/vnd.github.v4+json"
    }
    graphql_resp = requests.post(
        "https://api.github.com/graphql",
        headers=graphql_headers,
        json={"query": "{ viewer { login } }"}
    )

    if graphql_resp.status_code == 401:
        print("❌ Token is not valid for GraphQL access.")
        return False

    viewer = graphql_resp.json().get("data", {}).get("viewer", {}).get("login")
    if viewer:
        print(f"✅ GraphQL access OK. Viewer login: {viewer}")
        return True
    else:
        print("❌ Token failed GraphQL query.")
        return False


In [13]:

if validate_token(GITHUB_TOKEN):
    print("🎉 Token is valid and ready to use!")
else:
    print("⚠️ Please check the token or its permissions.")

✅ REST API access OK. Authenticated as: turbomam
✅ GraphQL access OK. Viewer login: turbomam
🎉 Token is valid and ready to use!


In [14]:
HEADERS = {
    "Authorization": f"Bearer {GITHUB_TOKEN}",
    "Accept": "application/vnd.github.v4+json"
}

In [15]:
query = """
{
  repository(owner: "microbiomedata", name: "nmdc-schema") {
    discussions(first: 1) {
      nodes {
        number
        title
        url
      }
    }
  }
}
"""

In [16]:
response = requests.post(
    "https://api.github.com/graphql",
    headers=HEADERS,
    json={"query": query}
)

In [17]:
print("Status:", response.status_code)
print("Response:", response.json())


Status: 200
Response: {'data': {'repository': {'discussions': {'nodes': [{'number': 2148, 'title': 'do we really need a `chemical_entity_set`?', 'url': 'https://github.com/microbiomedata/nmdc-schema/discussions/2148'}]}}}}


In [18]:
HEADERS = {
    "Authorization": f"Bearer {GITHUB_TOKEN}",
    "Accept": "application/vnd.github.v4+json"
}

In [19]:
def fetch_discussions(after_cursor=None):
    query = """
    query($owner: String!, $name: String!, $after: String) {
      repository(owner: $owner, name: $name) {
        discussions(first: 50, after: $after) {
          pageInfo {
            hasNextPage
            endCursor
          }
          nodes {
            number
            title
            url
            body
            createdAt
            author {
              login
            }
            comments(first: 100) {
              nodes {
                body
                createdAt
                author {
                  login
                }
              }
            }
          }
        }
      }
    }
    """
    variables = {
        "owner": REPO_OWNER,
        "name": REPO_NAME,
        "after": after_cursor
    }
    response = requests.post(
        "https://api.github.com/graphql",
        json={"query": query, "variables": variables},
        headers=HEADERS
    )
    response.raise_for_status()
    return response.json()

In [20]:
def get_all_discussions():
    discussions = []
    has_next = True
    after = None

    while has_next:
        data = fetch_discussions(after)
        repo_data = data["data"]["repository"]["discussions"]
        discussions.extend(repo_data["nodes"])
        has_next = repo_data["pageInfo"]["hasNextPage"]
        after = repo_data["pageInfo"]["endCursor"]

    return discussions

In [21]:
print("Fetching all discussions from nmdc-schema...")
all_discussions = get_all_discussions()

Fetching all discussions from nmdc-schema...


In [22]:
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    json.dump(all_discussions, f, indent=2)
print(f"✅ Saved {len(all_discussions)} discussions to {OUTPUT_FILE}")

✅ Saved 24 discussions to nmdc_schema_discussions.json
