In [None]:
import requests
import json
import time

# --- CONFIG ---
TOKEN = "<insert_your_github_token_here>"
OWNER = "Lightning-AI"
REPO = "pytorch-lightning"
OUTPUT_FILE = "lightning_discussions_latest.json"
ONLY_ANSWERED = True     # Set to False to get all
ONLY_CLOSED = False      # Set to True to include only closed
headers = {"Authorization": f"Bearer {TOKEN}"}

def fetch_discussions(after_cursor=None):
    after = f', after: "{after_cursor}"' if after_cursor else ""
    query = f"""
    {{
      repository(owner: "{OWNER}", name: "{REPO}") {{
        discussions(first: 50, orderBy: {{field: CREATED_AT, direction: DESC}}{after}) {{
          pageInfo {{
            hasNextPage
            endCursor
          }}
          nodes {{
            title
            body
            url
            createdAt
            updatedAt
            closedAt
            isAnswered
            author {{ login }}
            answer {{
              body
              author {{ login }}
              createdAt
            }}
          }}
        }}
      }}
    }}
    """
    r = requests.post("https://api.github.com/graphql", json={"query": query}, headers=headers)
    if r.status_code != 200:
        raise Exception(f"GitHub API Error {r.status_code}: {r.text}")
    return r.json()

def get_all_discussions():
    all_discussions = []
    after_cursor = None
    while True:
        data = fetch_discussions(after_cursor)
        repo_data = data["data"]["repository"]["discussions"]
        nodes = repo_data["nodes"]
        all_discussions.extend(nodes)

        if not repo_data["pageInfo"]["hasNextPage"]:
            break
        after_cursor = repo_data["pageInfo"]["endCursor"]
        print(f"Fetched {len(all_discussions)} discussions so far...")
        time.sleep(1)  # avoid rate limiting

    return all_discussions

def filter_discussions(discussions):
    filtered = discussions
    if ONLY_ANSWERED:
        filtered = [d for d in filtered if d["isAnswered"]]
    if ONLY_CLOSED:
        filtered = [d for d in filtered if d["closedAt"] is not None]
    return filtered

if __name__ == "__main__":
    print("Fetching discussions...")
    discussions = get_all_discussions()

    print(f"Total discussions fetched: {len(discussions)}")
    discussions = filter_discussions(discussions)
    print(f"After filtering: {len(discussions)}")

    with open(OUTPUT_FILE, "w") as f:
        json.dump(discussions, f, indent=2)

    print(f"💾 Saved filtered discussions to {OUTPUT_FILE}")




Fetching discussions...
Fetched 50 discussions so far...
Fetched 100 discussions so far...
Fetched 150 discussions so far...
Fetched 200 discussions so far...
Fetched 250 discussions so far...
Fetched 300 discussions so far...
Fetched 350 discussions so far...
Fetched 400 discussions so far...
Fetched 450 discussions so far...
Fetched 500 discussions so far...
Fetched 550 discussions so far...
Fetched 600 discussions so far...
Fetched 650 discussions so far...
Fetched 700 discussions so far...
Fetched 750 discussions so far...
Fetched 800 discussions so far...
Fetched 850 discussions so far...
Fetched 900 discussions so far...
Fetched 950 discussions so far...
Fetched 1000 discussions so far...
Fetched 1050 discussions so far...
Fetched 1100 discussions so far...
Fetched 1150 discussions so far...
Fetched 1200 discussions so far...
Fetched 1250 discussions so far...
Fetched 1300 discussions so far...
Fetched 1350 discussions so far...
Fetched 1400 discussions so far...
Fetched 1450 dis

In [2]:
# generate another json file with only the first 50 latest closed and answered discussions
import json     
with open("lightning_discussions_latest.json", "r") as f:
    discussions = json.load(f)
closed_answered = [d for d in discussions if d["isAnswered"] and d["closedAt"] is not None][:50]
with open("lightning_discussions_latest_closed_answered.json", "w") as f:
    json.dump(closed_answered, f, indent=2)