In [47]:
# !pip install --upgrade openai

In [11]:
import os
from pathlib import Path
from openai import OpenAI

# Load API_KEY from environment or fallback to .env
env_path = Path('.env')
if env_path.exists():
    for line in env_path.read_text().splitlines():
        if line.strip().startswith('#') or '=' not in line:
            continue
        key, value = line.split('=', 1)
        if key.strip() == 'API_KEY' and value.strip():
            os.environ.setdefault('API_KEY', value.strip())
            break

API_KEY = os.getenv('API_KEY')
if not API_KEY:
    raise RuntimeError('Set API_KEY in your environment or .env')

# create an OpenAI client using the API key
client = OpenAI(api_key=API_KEY)


In [None]:
# import pymupdf4llm
# md_text = pymupdf4llm.to_markdown("6832_SercePehlevan_2020.pdf")

# import pathlib
# pathlib.Path("output.md").write_bytes(md_text.encode())

In [35]:
file = client.files.create(
    file=open("6832_SercePehlevan_2020.pdf", "rb"),
    purpose="user_data",
)

In [44]:
def generate_response_with_chatgpt(prompt):
    response = client.responses.create(
        model="gpt-4.1",
        input=[
            {
                "role": "user",
                "content": [
                    { "type": "input_text", "text": prompt },
                    {
                        "type": "input_file",
                        "file_id": file.id,
                    }
                ]
            }
        ]
    )

    return response.output_text

In [None]:
with open("domain_1_q_1_1.txt", "r", encoding="utf-8") as f:
    content = f.read()

answer = generate_response_with_chatgpt(content)

In [10]:
import re
from collections import defaultdict
from pathlib import Path

# Map domain (variants via folder name) -> question code -> prompt question file path
prompt_question_files = defaultdict(dict)
pattern = re.compile(r"^domain_?(\d+)(?:_(.+))?$")
for prompt_path in Path('prompts').rglob('question_*.txt'):
    match = pattern.match(prompt_path.parent.name)
    if not match:
        continue
    domain_id, variant = match.groups()

    stem_parts = prompt_path.stem.split('_')
    qnum = stem_parts[1] if len(stem_parts) > 1 else 'unknown'
    question_code = f"{domain_id}.{qnum}"

    domain_key = f"domain_{domain_id}" if not variant else f"domain_{domain_id}_{variant}"
    prompt_question_files[domain_key][question_code] = str(prompt_path)

# Sort questions for stable output
prompt_question_files = {
    domain: {code: path for code, path in sorted(questions.items())}
    for domain, questions in sorted(prompt_question_files.items())
}

print('Prompt question files by domain (variants split via folder name):')
for domain, questions in prompt_question_files.items():
    print(domain)
    for code, f in questions.items():
        print(f" - {code}: {f}")


Prompt question files by domain (variants split via folder name):
domain_1_randomization
 - 1.1: prompts/domain_1_randomization/question_1.txt
 - 1.2: prompts/domain_1_randomization/question_2.txt
 - 1.3: prompts/domain_1_randomization/question_3.txt
domain_2_adhering
 - 2.1: prompts/domain_2_adhering/question_1.txt
 - 2.2: prompts/domain_2_adhering/question_2.txt
 - 2.3: prompts/domain_2_adhering/question_3.txt
 - 2.4: prompts/domain_2_adhering/question_4.txt
 - 2.5: prompts/domain_2_adhering/question_5.txt
 - 2.6: prompts/domain_2_adhering/question_6.txt
domain_2_assigment
 - 2.1: prompts/domain_2_assigment/question_1.txt
 - 2.2: prompts/domain_2_assigment/question_2.txt
 - 2.3: prompts/domain_2_assigment/question_3.txt
 - 2.4: prompts/domain_2_assigment/question_4.txt
 - 2.5: prompts/domain_2_assigment/question_5.txt
 - 2.6: prompts/domain_2_assigment/question_6.txt
 - 2.7: prompts/domain_2_assigment/question_7.txt
domain_3_missing_data
 - 3.1: prompts/domain_3_missing_data/question

In [16]:
# Read PDF text, load a domain module, and walk its signalling questions
import importlib
from pathlib import Path
from rob2.common import Response

pdf_path = Path('studies/6832_SercePehlevan_2020.pdf')


file = client.files.create(
    file=open(pdf_path, "rb"),
    purpose="user_data",
)

for domain_key in prompt_question_files:
    state = {}
    domain_prompts = prompt_question_files[domain_key]
    domain_module = importlib.import_module(f'rob2.{domain_key}')
    get_next_question = next(
        getattr(domain_module, name)
        for name in dir(domain_module)
        if name.startswith('get_next_question_domain')
    )

    question_code = get_next_question(state)
    while question_code:
        prompt_path = Path(domain_prompts.get(question_code, ''))
        prompt_text = prompt_path.read_text(encoding='utf-8') if prompt_path.exists() else None
        print(f"{domain_key} -> {question_code}")
        if prompt_text:
            print(prompt_text)
        else:
            print('Prompt not found for this question code.')

        state[question_code] = Response.NI  # replace with real answer
        break
        question_code = get_next_question(state)


domain_1_randomization -> 1.1
Domain 1 – Question 1.1
-----------------------

Prompt:
You are evaluating Risk of Bias Domain 1, Question 1.1:
“Was the allocation sequence random?”

Read the provided document.

Your tasks:

1. Identify whether the study used a truly random method to generate the allocation sequence.
   Random methods include:
     - computer-generated random numbers
     - random number tables
     - coin toss or dice rolling
     - shuffled cards or envelopes
     - minimization with a random element
   Non-random or predictable methods include:
     - alternation
     - date of birth
     - admission date
     - medical record number
     - clinician or investigator judgment
     - any systematic or predictable rule

2. Return one of the following:
   Y  = Yes
   PY = Probably Yes
   NI = No Information
   PN = Probably No
   N  = No

3. Provide a short justification explaining why the chosen answer is appropriate.

4. Quote exact sentences or phrases from the docume

ImportError: cannot import name 'DOMAIN2_QUESTIONS' from 'rob2.questions' (/Users/anthony/personal_local/RoB/rob2/questions.py)