In [1]:
import requests
import os
import dotenv

dotenv.load_dotenv()

API_KEY = os.getenv("METACULUS_API_KEY")

In [2]:
import ray
import time
from tqdm import tqdm

# Initialize Ray
ray.init(ignore_reinit_error=True, num_cpus=10)

@ray.remote
def fetch_posts(offset, limit=1000):
    return requests.get(f'https://metaculus.com/api/posts/?limit={limit}&offset={offset}')

results = []
base_offset = 0
while True:
    should_stop=False
    # Start multiple parallel requests with different offsets
    offsets = [i for i in range(base_offset, base_offset + 10000, 500)]  # Start with just one offset, can be expanded
    request_ids = [fetch_posts.remote(offset, limit=500) for offset in offsets]

    # Create progress bar
    pbar = tqdm(total=len(request_ids), desc="Fetching posts")

    # Get results as they complete without blocking
    batch_results = []
    remaining_ids = request_ids.copy()
    while remaining_ids:
        # Check which ones are ready without blocking
        ready_ids, remaining_ids = ray.wait(remaining_ids, timeout=0.1, num_returns=1)
        
        # If any are ready, get them and update the progress bar
        if ready_ids:
            for ready_id in ready_ids:
                batch_results.append(ray.get(ready_id))
                pbar.update(1)

    results.extend(batch_results)

    pbar.close()

    # Check if we've reached the end of the data
    # If the last request doesn't have a 200 status code, break the loop
    # Check for non-200 status codes
    if any(r.status_code != 200 for r in batch_results):
        print(f"Stopping: Received non-200 status code")
        should_stop = True
    
    
    # Also break if we've processed all available data
    if any(r.status_code == 200 and len(r.json().get('results', [])) < 500 for r in batch_results):
        print("Stopping: Received fewer results than requested limit")
        should_stop=True
    
    if should_stop:
        break
    
    # Continue to the next batch
    base_offset += 10000
    time.sleep(1)  # Add a small delay to avoid overwhelming the API

  from .autonotebook import tqdm as notebook_tqdm
2025-03-10 16:27:16,523	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-03-10 16:27:17,472	INFO worker.py:1832 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
Fetching posts: 100%|██████████| 20/20 [00:18<00:00,  1.10it/s]


Stopping: Received fewer results than requested limit


In [3]:
posts = []
for res in results:
    posts.extend(res.json()['results'])


In [20]:
from datetime import datetime as dt

for i, post in enumerate(posts):
    if 'question' not in post:
        print('no question')
        break
    if 'type' not in post['question']:
        print('no type')
        break

non_questions = [post for i, post in enumerate(posts) if 'question' not in post]
questions = [post for i, post in enumerate(posts) if 'question' in post]
binary_qs = [q for q in questions if q['question']['type'] == 'binary']

import datetime
# currently just using UTC time
early_resolve = [q for q in binary_qs if dt.fromisoformat('2023-03-01T00:00:00Z') < dt.fromisoformat(q['scheduled_resolve_time']) < dt.now(datetime.timezone.utc)]
early_resolve = [q for q in early_resolve if q['title'] != '[DELETED QUESTION]']
early_resolve = [q for q in early_resolve if q['status'] == 'resolved']

no question


In [21]:
titles = [q['title'] for q in early_resolve]
with open('early_resolve_titles.txt', 'w') as f:
    # Clear the file first before writing new content
    f.write('')
    f.close()
    
    # Reopen the file for writing
with open('early_resolve_titles.txt', 'w') as f:
    for title in titles:
        f.write(title.strip() + '\n')


In [23]:
import json
with open('metaculus_questions.json', 'w') as f:
    json.dump(early_resolve, f)

In [13]:
from noa_tools import hist

import numpy as np
hist([q['nr_forecasters']+np.random.randn(1)*0.01 for q in early_resolve], info=[q['title'] for q in early_resolve])

In [14]:
len(early_resolve)

3289

In [97]:
datetime.datetime.now(datetime.timezone.utc)

datetime.datetime(2025, 3, 10, 23, 10, 15, 167545, tzinfo=datetime.timezone.utc)

In [87]:
from datetime import datetime
binary_qs[0]['scheduled_close_time']

'2029-01-01T04:59:00Z'

{'id': 35595,
 'title': "Metaculus's First Spanish-Language Contest: Torneo Kiko Llaneras 🔮",
 'url_title': 'Torneo Kiko Llaneras 🔮',
 'slug': 'torneo-kiko-llaneras',
 'author_id': 103275,
 'author_username': 'christian',
 'coauthors': [],
 'created_at': '2025-02-28T19:50:05.197804Z',
 'published_at': '2025-02-28T19:50:46Z',
 'edited_at': '2025-03-08T20:15:59.656634Z',
 'curation_status': 'approved',
 'curation_status_updated_at': '2025-02-28T19:50:46Z',
 'comment_count': 5,
 'status': 'approved',
 'resolved': False,
 'actual_close_time': None,
 'scheduled_close_time': None,
 'scheduled_resolve_time': None,
 'open_time': '2025-02-28T19:50:46.417289Z',
 'nr_forecasters': 0,
 'projects': {'news_category': [{'id': 2421,
    'name': 'Programs News',
    'slug': 'programs',
    'type': 'news_category',
    'default_permission': 'forecaster'}],
  'default_project': {'id': 2421,
   'name': 'Programs News',
   'slug': 'programs',
   'type': 'news_category',
   'default_permission': 'forecaster

In [29]:
len(results[-4].json()['results'])

119

In [None]:
results

In [34]:
results[-4].json()

{'next': 'http://www.metaculus.com/api/posts/?limit=500&offset=10000',
 'previous': 'http://www.metaculus.com/api/posts/?limit=500&offset=9000',
 'results': [{'id': 220,
   'title': 'Zapping Zika #2: active transmission in the U.S. by August 2016?',
   'url_title': '',
   'slug': 'zapping-zika-2-active-transmission-in-the-us-by-august-2016',
   'author_id': 8,
   'author_username': 'Anthony',
   'coauthors': [],
   'created_at': '2016-04-22T17:36:13.037456Z',
   'published_at': '2016-04-23T18:37:02Z',
   'edited_at': '2024-10-07T08:38:03.248858Z',
   'curation_status': 'approved',
   'curation_status_updated_at': '2016-04-23T18:37:02Z',
   'comment_count': 9,
   'status': 'resolved',
   'resolved': True,
   'actual_close_time': '2016-07-01T17:35:02Z',
   'scheduled_close_time': '2016-07-01T17:35:02Z',
   'scheduled_resolve_time': '2016-08-26T13:20:22.103000Z',
   'open_time': '2016-04-23T18:37:02Z',
   'nr_forecasters': 40,
   'projects': {'tag': [{'id': 32600,
      'name': '2016 Lead