# Notebook for loading and submitting questions
***

## Question JSON Format

Each question is a Python dict with the following keys:

- **uuid**: unique identifier
- **ChemIQ**: Boolean whether question is part of main ChemIQ benchmark
- **question_category**, **sub_category**  
- **meta_data**: e.g. `smiles`, `smiles_random`, `carbon_count`  
- **prompt**: the question text shown to users  
- **answer**: the expected answer  
- **answer_format**, **answer_range**, **verification_method**  

To submit a question, send its `prompt` and keep track of the `uuid`.  


In [4]:
import json
from collections import Counter
from pathlib import Path

# Load all questions
lines = Path('questions/chemiq.jsonl').read_text(encoding='utf-8').splitlines()
data = [json.loads(line) for line in lines]

# Summarise totals
total = len(data)

print(f"Total questions (n={total}):")
for (category, sub_category), count in sorted(counts.items()):
    print(f" - {category!r}, {sub_category!r}: {count}")

chemiq_questions = [q for q in data if q.get('ChemIQ', False)]

Total questions (n=796):
 - 'atom_mapping', 'random': 92
 - 'atom_mapping', 'semi-canonical': 92
 - 'counting_carbon', None: 50
 - 'counting_ring', None: 48
 - 'nmr_elucidation', 'small': 46
 - 'nmr_elucidation', 'zinc': 30
 - 'reaction', 'synthetic_canonical': 45
 - 'reaction', 'synthetic_random': 45
 - 'sar', 'integer': 20
 - 'sar', 'noise': 20
 - 'shortest_path', 'canonical': 54
 - 'shortest_path', 'random': 54
 - 'smiles_to_iupac', 'zinc_canonical': 100
 - 'smiles_to_iupac', 'zinc_random': 100


## Example question

In [5]:
print(f"{'='*20} PROMPT {'='*20}")
print(chemiq_questions[0]["prompt"])
print(f"{'='*20} ANSWER {'='*20}")
print(chemiq_questions[0]["answer"])

How many carbon atoms are in the molecule:

S(c1sc(N)nn1)C(F)F

Give your answer as an integer. Do not write any comments.
3


# Running benchmark using OpenAI API
***

## Create batch submission file

In [46]:
# API Batch file
batch_submission_file = 'batch_submission_files/gpt-4o-2024-11-20-submission.jsonl'

with open(batch_submission_file, 'w') as f:
    for question in chemiq_questions:
        question_id = question["uuid"]
        prompt = question["prompt"]
        record = {
            "custom_id": question_id,
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": "gpt-4o-2024-11-20",
                "messages": [{"role": "user", "content": prompt}],
            }
        }
        f.write(json.dumps(record) + "\n")

print(f"Successfully wrote batch requests to {output_file}. Number of questions = {len(chemiq_questions)}")

Successfully wrote batch requests to batch_submission_files/gpt-4o-2024-11-20-submission.jsonl. Number of questions = 796


## Submit batch to OpenAI API

In [47]:
import os
import openai
from openai import OpenAI

# Set your OpenAI API key
openai.api_key = os.environ["OPENAI_API_KEY"]
client = OpenAI(api_key=openai.api_key)

In [52]:
"""
batch_input_file = client.files.create(
    file=open(batch_submission_file, "rb"),
    purpose="batch"
)

batch_input_file_id = batch_input_file.id
created_batch = client.batches.create(
    input_file_id=batch_input_file_id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
    metadata={
        "description": batch_submission_file,
    }
)
print(created_batch)
# Keep track of the Batch ID if submitting multiple different models
print(f"Batch ID: {created_batch.id}")
"""

Batch(id='batch_6800e7e2a0d4819090d713821e380203', completion_window='24h', created_at=1744889826, endpoint='/v1/chat/completions', input_file_id='file-BJzeAW7RVPFvdzLAjH1pe1', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1744976226, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'batch_submission_files/gpt-4o-2024-11-20-submission.jsonl'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))


## Download results

In [59]:
batch_results_file = "batch_results_files/gpt-4o-2024-11-20-results.jsonl"

batch_result = client.batches.retrieve(created_batch.id)
print(batch_result)

if batch_result.error_file_id:
    error_file_response = client.files.content(batch_result.error_file_id)

if batch_result.output_file_id:
    output_file_response = client.files.content(batch_result.output_file_id)

    # Decode the binary content to a UTF-8 string
    data_str = output_file_response.content.decode('utf-8')

    # Split the decoded string by lines and parse each non-empty line as JSON
    results = [json.loads(line) for line in data_str.splitlines() if line.strip()]

"""
with open(batch_results_file, "w", encoding="utf-8") as f:
    for record in results:
        # dump each dict as a JSON string, followed by newline
        f.write(json.dumps(record, ensure_ascii=False) + "\n")

print(f"Wrote {len(results)} records to {batch_results_file}")
"""

Batch(id='batch_6800e7e2a0d4819090d713821e380203', completion_window='24h', created_at=1744889826, endpoint='/v1/chat/completions', input_file_id='file-BJzeAW7RVPFvdzLAjH1pe1', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1744890232, error_file_id=None, errors=None, expired_at=None, expires_at=1744976226, failed_at=None, finalizing_at=1744890127, in_progress_at=1744889828, metadata={'description': 'batch_submission_files/gpt-4o-2024-11-20-submission.jsonl'}, output_file_id='file-PgbjLzDgctDmLKJLHcEkkm', request_counts=BatchRequestCounts(completed=796, failed=0, total=796))
