In [25]:
import requests
import json
import pandas as pd
from datetime import datetime, timedelta
import time

In [26]:
url = "https://www.lesswrong.com/graphql"

# GraphQL query template
query = """
query ($after: Date, $before: Date, $limit: Int) {
  posts(input: {
    terms: {
      view: "new",
      limit: $limit,
      meta: false,
      after: $after,
      before: $before
    }
  }) {
    results {
      _id
      title
      slug
      pageUrl
      postedAt
      baseScore
      voteCount
      commentCount
      meta
      question
      url
      htmlBody
      user {
        username
        slug
        displayName
      }
    }
  }
}
"""

headers = {
    "Content-Type": "application/json",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Referer": "https://www.lesswrong.com/",
    "Origin": "https://www.lesswrong.com"
}

In [None]:
# Start from Jan 2016
start_date = datetime(2016, 1, 1)
end_date = datetime.today()

all_results = []

while start_date < end_date:
    # calculate month range
    next_month = (start_date.replace(day=28) + timedelta(days=4)).replace(day=1)
    after = start_date.isoformat() + "Z"
    before = next_month.isoformat() + "Z"
    
    variables = {"after": after, "before": before, "limit": 10000}  # 5000 per request
    response = requests.post(url, json={"query": query, "variables": variables}, headers=headers)
    data = response.json()
    results = data.get("data", {}).get("posts", {}).get("results", [])
    
    # Save JSON for this month
    filename = f"{start_date.year}-{start_date.month:02}.json"
    filepath = f"lw_json/{start_date.year}/" + filename
    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    print(f"✅ Saved {len(results)} posts to {filepath}")

        # optional: small delay to avoid rate limiting
    time.sleep(1)

    # Move to next month
    start_date = next_month

✅ Saved 122 posts to lw_json/2016/2016-01.json
✅ Saved 105 posts to lw_json/2016/2016-02.json
✅ Saved 101 posts to lw_json/2016/2016-03.json
✅ Saved 107 posts to lw_json/2016/2016-04.json
✅ Saved 79 posts to lw_json/2016/2016-05.json
✅ Saved 104 posts to lw_json/2016/2016-06.json
✅ Saved 93 posts to lw_json/2016/2016-07.json
✅ Saved 87 posts to lw_json/2016/2016-08.json
✅ Saved 114 posts to lw_json/2016/2016-09.json
✅ Saved 107 posts to lw_json/2016/2016-10.json
✅ Saved 134 posts to lw_json/2016/2016-11.json
✅ Saved 163 posts to lw_json/2016/2016-12.json
✅ Saved 182 posts to lw_json/2017/2017-01.json
✅ Saved 147 posts to lw_json/2017/2017-02.json
✅ Saved 169 posts to lw_json/2017/2017-03.json
✅ Saved 117 posts to lw_json/2017/2017-04.json
✅ Saved 145 posts to lw_json/2017/2017-05.json
✅ Saved 121 posts to lw_json/2017/2017-06.json
✅ Saved 80 posts to lw_json/2017/2017-07.json
✅ Saved 71 posts to lw_json/2017/2017-08.json
✅ Saved 126 posts to lw_json/2017/2017-09.json
✅ Saved 226 posts 