In [None]:
%load_ext autoreload
%autoreload 2
import pandas as pd
print(pd.__version__)
from utils.utils import *
from utils.openai_utils import *


# OpenAI

## Pipeline

In [None]:
client = OpenAI()
prompt_type = 'Metric'
name = "Final_results
models = [ 'o4-mini-2025-04-16']
datasets = ['CSII','CIQ']
for dataset in datasets:
    for model in models:

        batch_dir =f"{name}/{dataset}/{prompt_type}/{model}/"
        os.makedirs(batch_dir, exist_ok=True)
        
        # Load data
        individual_data = load_data_metrics_for_prompt(dataset) 

        # Load prompts for pairs
        prompts, info_dict = compile_prompt_pairs(batch_dir, individual_data, )

        # For reversed order uncomment
        #prompts, info_dict =compile_prompt_pairs_reversed(batch_dir, individual_data, )
        
        batch_json = make_batch_json_openai(prompts, batch_dir, model )

        # Make the input file
        client = OpenAI()
        batch_input_file = client.files.create(
            file=open(batch_json, "rb"),
            purpose="batch"
        )
        # save file metadata
        save_input_file_openai(batch_input_file, batch_dir,batch_json)
        

        # Send to the server
        client = OpenAI()
        batch_input_file_id = batch_input_file.id
        batch_sent= client.batches.create(
            input_file_id=batch_input_file_id,
            endpoint="/v1/responses",
            completion_window="24h",
            metadata={
                "description": f"{model}_{dataset}_{datetime.now().strftime('%Y_%m_%d_%Hh')}"
            }
        )
        # save batch metadata
        save_batch_file_openai(batch_sent, batch_dir)



## Check on batch

In [None]:
client = OpenAI()
prompt_type = 'Metric'
name = "Final_results_reversed"
models = [ 'o4-mini-2025-04-16','gpt-3.5-turbo-0125','gpt-4.1-2025-04-14']

In [None]:

datasets = [ 'MDI','CIQ', 'CSII']
for dataset in datasets:
    for model in models:
        batch_dir =f"{name}/{dataset}/{prompt_type}/{model}/"
        batch_metadata = load_batch_file_openai(batch_dir)
        batch =client.batches.retrieve(batch_metadata['batch_id'])
        print(batch)
        save_batch_file_openai(batch, batch_dir)


## Read and Save output

In [None]:
batch_metadata

In [None]:

for model in models:
    print('######')
    print(model)

    for dataset in datasets:
        print(dataset)
        try:
            batch_dir =f"{name}/{dataset}/{prompt_type}/{model}/"

            batch_metadata = load_batch_file_openai(batch_dir)
            file_response = client.files.content(batch_metadata['output_file_id'])


            content_str = file_response.read().decode("utf-8")
            ## Save
            with open(f"{batch_dir}/output_content_lines.txt", "w") as f:
                for line in content_str.strip().split("\n"):
                    f.write(line + "\n")
            outputs = {}
            for line in content_str.strip().split("\n"):
                data = json.loads(line)
                try:
                    
                    if model == "o4-mini-2025-04-16":
                        if len(data["response"]["body"]["output"]) > 1:
                            if data["response"]["body"]["output"][1]["type"] == "message":
                                output_text = data["response"]["body"]["output"][1]["content"][0]["text"]
                            elif data["response"]["body"]["output"][2]["type"] == "message":
                                output_text = data["response"]["body"]["output"][2]["content"][0]["text"]

                    else:
                        output_text = data["response"]["body"]["output"][0]["content"][0]["text"]
                        #print(output_text)

                    outputs[data['custom_id']] = output_text
                    if output_text not in ['A', ' B ','B', 'A.', 'B.', 'A ','A  ', 'B ', 'A    ', 'B    ', '**A**', '**B**', 'Patient A', 'Patient B']:
                        print(f"Unexpected output: {output_text} {len(output_text)}")

                except Exception as e:
                    print(data)
                    print(f"Error parsing line: {e}")
            print(len(outputs))
            # Saved as outputs
            with open(f"{batch_dir}/outputs.p", "wb") as f:
                pickle.dump(outputs, f)
        except Exception as e:
            #print(model)
            print(e)

# Anthropic


#### Test

In [None]:
from utils.anthropic_utils import *

In [None]:
from utils.anthropic_utils import *
client = anthropic.Anthropic()
models = ['claude-3-7-sonnet-20250219','claude-3-haiku-20240307']
datasets = ['MDI','CIQ', 'CSII']


In [None]:
models = ['claude-3-7-sonnet-20250219','claude-3-haiku-20240307']
datasets = ['CSII']

for dataset in datasets:
    batch_dir =f"{name}/{dataset}/{prompt_type}/{model}/"
    os.makedirs(batch_dir, exist_ok=True)
    
    # Load data
    individual_data = load_data_metrics_for_prompt(dataset) 

    # Load prompts for pairs
    prompts, info_dict = compile_prompt_pairs(batch_dir, individual_data, )
    #*******
    # For reversed order uncomment
    #prompts, info_dict =compile_prompt_pairs_reversed(batch_dir, individual_data, )
        
    
    batch_json, requests = make_batch_json_anthropic(prompts, batch_dir , model)

   
    save_json(
        data=requests,
        directory=batch_dir,
        custom_ending="requests.jsonl"
    )
    simple_data = {
            'batch_id': batch_json.id,
            'status': batch_json.processing_status,
            'created_at': batch_json.created_at.isoformat() if batch_json.created_at else None,
            'saved_at': datetime.now().isoformat()
        }
    save_json(
            data=simple_data,
            directory=batch_dir,
            custom_ending="batch_sent.jsonl"
        )


### Read

In [None]:
models = ['claude-3-7-sonnet-20250219']
for model in models:
    for dataset in datasets:
        
        batch_dir =f"{name}/{dataset}/{prompt_type}/{model}/"

        
        outputs = {}
            
        try:
            results = client.messages.batches.results(
                                                load_json(batch_dir, "batch_sent.jsonl")['batch_id'],
                                            )
            with open(f"{batch_dir}/output_content_lines.txt", "w") as out_file:

                for result in results:
                    output_text = result.result.message.content[0].text
                    outputs[result.custom_id] = output_text
                    if output_text not in ['A', 'B', 'A.', 'B.', 'A ', 'B ', 'A    ', 'B    ', '**A**', '**B**', 'Patient A', 'Patient B']:
                        print(f"Unexpected output: {output_text}")
        
                    out_file.write(json.dumps(result.model_dump()) + "\n")
        except Exception as e:
            
            print(f"Error parsing line: {e}")
        print(len(outputs))
        # Saved as outputs
        with open(f"{batch_dir}/outputs.p", "wb") as f:
            pickle.dump(outputs, f)