In [2]:
import anthropic
from anthropic.types.message_create_params import MessageCreateParamsNonStreaming
from anthropic.types.messages.batch_create_params import Request

import json
import math
import pandas as pd

import functions.prompts as prompts

client = anthropic.Anthropic()

In [3]:
def req(id, field):
    return Request(
        custom_id=id,
        params=MessageCreateParamsNonStreaming(
            model="claude-3-7-sonnet-20250219",
            thinking={
                "type": "enabled",
                "budget_tokens": 32000
            },
            max_tokens=64000,
            messages=[{
                "role": "user",
                "content": f"Generate {field} paper that could be published in a top-5 economics journal."
            }]
        )
    )
    
def batch():
    fields = ['a finance', 'a macroeconomics', 'a microeconimics', 'an applied microeconomics']
    reqs =  [ [req(f"{field.split(" ")[1][:3]}_{i}", field) for i in range(5)] for field in fields]
    return [ x for xs in reqs for x in xs ]

In [None]:
batch = client.messages.batches.create(requests=batch())
print(batch)

In [17]:
# send one request
x = client.messages.batches.create(requests=[req("test", "a macroeconomics")])

In [19]:
x

MessageBatch(id='msgbatch_01Gv77kcHCMePdYdwdBbiXJj', archived_at=None, cancel_initiated_at=None, created_at=datetime.datetime(2025, 3, 25, 10, 21, 2, 667975, tzinfo=datetime.timezone.utc), ended_at=None, expires_at=datetime.datetime(2025, 3, 26, 10, 21, 2, 667975, tzinfo=datetime.timezone.utc), processing_status='in_progress', request_counts=MessageBatchRequestCounts(canceled=0, errored=0, expired=0, processing=1, succeeded=0), results_url=None, type='message_batch')

In [5]:
import time

id = "msgbatch_01Gv77kcHCMePdYdwdBbiXJj"
def wait(id):
    results = client.messages.batches.retrieve(id).processing_status
    while results == "in_progress":
        stat = client.messages.batches.retrieve(id)
        print(stat.request_counts)
        results = stat.processing_status
        time.sleep(5)

wait(id)

MessageBatchRequestCounts(canceled=0, errored=0, expired=0, processing=1, succeeded=0)


KeyboardInterrupt: 

In [18]:
batches = []
with open("dump/anthropic-batches.txt", 'r') as f:
    batches = f.read()
    batches = batches.split("\n")

In [24]:
def parse_r(r):
    id = r.custom_id
    validateModel = prompts.Top5Model if "top5" in id else prompts.AnalysisModel
    try:
        text = r.result.message.content[0].text
        text = "{" + text.split("}")[0] + "}"
        return {
            "id": id,
            "scores": validateModel.model_validate(json.loads(text)).model_dump()
        }
    except Exception as e:
        print(f"Error {e} - {"{" + r.result.message.content[0].text}")
        return {
            "id": id,
            "scores": None
        }

for b in batches:
    results = client.messages.batches.results(b)
    for r in results:
        if(r and r.result.type == 'succeeded'):
            try:
                with open('eval-output/anthropic-result.jsonl', 'a') as f:
                    f.write(json.dumps(parse_r(r)) + "\n")
            except Exception as e:
                print("ERROR! - " + str(e))

In [25]:
import pandas as pd

df = pd.read_csv("papers.csv")

f = open('eval-output/anthropic-result.jsonl', 'r')
file_response = f.read()
f.close()

for line in file_response.split("\n")[:-1]:
    l = json.loads(line)
    id = l['id'].split("Z")[0]
    no, typ = l['id'].split("Z")[1].split("Q")
    
    idx = df.index[df['id'] == id].tolist()[0]
    
    # content = l['response']['body']['choices'][0]['message']['content']
    
    metrics = ['score'] if typ == "top5" else ['originality', 'rigor', 'scope', 'impact', 'written_by_ai']
    validateModel = prompts.Top5Model if typ == "top5" else prompts.AnalysisModel

    for i, metric in enumerate(metrics):
        column_name = f"anthropic-{metric}-{int(no)+1}"
        
        if column_name not in df.columns:
            df[column_name] = None

        o = validateModel.model_validate(l['scores'])
        df.loc[idx, column_name] = o.__dict__[metric]

In [32]:
df.to_csv("result_a.csv", index=False)