In [None]:
import asyncio
import os
import json
from copy import deepcopy
from pathlib import Path

import anthropic
from xopen import xopen
from tqdm import tqdm

In [None]:
async_client = anthropic.AsyncAnthropic(
    api_key="#YOUR_API_KEY#",
)

Reference: https://www.datacamp.com/tutorial/getting-started-with-claude-3-and-the-claude-3-api

### 1. Preparing Your Batch File

In [None]:
def create_user_prompt(input, dataset):
    instructions = []
    for i in range(1, 7):
        if f"instruction_{i}" not in input or \
            len(input[f"instruction_{i}"]) < 4:
                break
        instruction_content= input[f"instruction_{i}"]
        instructions.append(f"Instruction_{i}. {instruction_content}")
    instruction_promp = "\n".join(instructions)

    if "math" in dataset.lower():
        task = 'In the following, you will receive multiple instructions. Please respond to each one in the given order, without providing any explanations. Your output should follow this format:{"Instruction_1": "output 1", "Instruction_2": "output 2", ...}'
        return f"{task}\n{instruction_promp}"
    else:
        task = "In the following, you will receive a context and multiple instructions. Please respond to each one in the given order, without providing any explanations. Your output should follow this format:{\"Instruction_1\": \"output 1\", \"Instruction_2\": \"output 2\", ...}"
        context = "Context:\n" + input["context"] + "\n" if "context" in input else ""
        return f"{task}\n{context}{instruction_promp}"

In [None]:

model = "claude-3-opus-20240229" 
data_file_path = "../sifo_datasets/math.jsonl"
batch_file_dir = "../batch_files"
task_name = data_file_path.split("/")[-1].split(".")[0].split("_")[0]
batch_file_path = os.path.join(batch_file_dir, f"{task_name}.jsonl")

system_prompt = "You are a helpful assistant."

requests_params = []
input_data = {}
prompt_data = {}
with xopen(data_file_path, "r") as fin:
    for line in tqdm(fin):
        input_example = json.loads(line)
        input_data[input_example["id"]] = input_example
        user_prompt = create_user_prompt(input_example, task_name)
        prompt_data[input_example["id"]] = user_prompt
        params = {
            "id": input_example["id"],
            "model": model,
            "system_prompt": system_prompt,
            "messages": [
                {"role": "user", "content": user_prompt}
            ],
            "max_tokens": 1000
        }
        print(params)
        requests_params.append(params)

with xopen(batch_file_path, "w") as f:
    for params in requests_params:
        f.write(json.dumps(params) + "\n")

In [None]:
def chunks(list_, n):
    for i in range(0, len(list_), n):
        yield list_[i:i + n]

In [None]:
max_requests_per_minute = 3   # Free Tier rate limit
chunked_requests_params = chunks(requests_params[:47], max_requests_per_minute)

In [None]:
async def send_request(request_params):
    message = await async_client.messages.create(
        model=request_params["model"],
        system=request_params["system_prompt"],
        messages=request_params["messages"],
        max_tokens=request_params["max_tokens"],
    )
    return message.content[0].text

In [None]:
async def send_request_batch():
    responses = []
    try:
        for chunk in chunked_requests_params:
            batch_of_responses = await asyncio.gather(
                *(send_request(params) for params in chunk)
            )
            responses.extend(batch_of_responses)
            await asyncio.sleep(5)  # sleep for (slightly more than) 1 minute
    except anthropic.InternalServerError as e:
        if e.status_code == 529:  # API server overloaded
            print("API server overloaded.")
        else:
            raise e
    return responses

In [None]:
# responses = asyncio.run(send_request_batch())  # for python script
responses = await send_request_batch()  # for jupyter notebook

In [None]:
merge_data = []
output_file_path = Path("claude3_generations", model, f"{model}_{task_name}_first_47_new.jsonl")
output_file_path.parent.mkdir(parents=True, exist_ok=True)

with xopen(output_file_path, "w") as f:
    for request_params, raw_response in zip(requests_params[:47], responses):
        id_ = request_params["id"]
        output_example = deepcopy(input_data[id_])
        output_example["prompt"] = prompt_data[id_]
        output_example["response"] = raw_response
        merge_data.append(output_example)
        f.write(json.dumps(output_example) + "\n")