## imports

In [1]:
import pandas as pd
import numpy as np
import json
import os
import asyncio

from typing import List, Dict
from openai import OpenAI

from prompts import prompts
from pydantic_classes import FinalAnswer
import pickle


## initiates

In [2]:
open_client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))

In [3]:
sampled_df = pd.read_json('sampled_board.jsonl', lines=True, orient='records', encoding='utf-8')

question_1 = sampled_df.loc[234,'translated_text']
print(question_1)

Question:
A 30-year-old woman with complaints of severe pleuritic chest pain has presented to the emergency department 3 days ago. The pain improves when sitting and worsens when lying down. What are the expected changes in the ECG?

Options:
Option 1: Downsloping ST segment in leads aVR and V1

Option 2: Q wave in limb leads

Option 3: Horizontal ST segment in leads V4-V6

Option 4: Negative T wave along with elevated ST segment in leads V4-V6


In [4]:
sampled_df_300 = sampled_df[:300]

In [5]:
tempratures = [0.3,0.7,1]
models = ["accounts/fireworks/models/llama-v3p1-8b-instruct",'gpt-4o-2024-08-06',"accounts/fireworks/models/deepseek-v3","accounts/fireworks/models/llama-v3p3-70b-instruct","o3-mini-2025-01-31"]
experiment_configs = []
for model in models:
    for temp in tempratures:
        for prompt_name, prompt in prompts.items():

            config = {
                'model': model,
                'temperature': temp,
                'prompt': prompt_name
            }
            experiment_configs.append(config)

print(len(experiment_configs))

270


## functions

In [6]:

def get_experiment_config(config_index):
    conf = experiment_configs[config_index]
    print(f"Model: {conf.get('model')}, Temperature: {conf.get('temperature')}, Prompt: {conf.get('prompt')}")
    model = conf.get('model')
    temperature = conf.get('temperature')  
    prompt_name = conf.get('prompt')
    final_name = f"M_{model}_T_{temperature}_P_{prompt_name}"
    # print(final_name)
    return model, temperature, prompt_name, final_name

model, temperature, prompt_name, final_name = get_experiment_config(265)

Model: o3-mini-2025-01-31, Temperature: 1, Prompt: few_shot_qual


In [None]:
def creat_tasks(model_name,model_temprature,model_prompt):
    # Creating an array of json tasks
    tasks = []

    for index, row in sampled_df_300.iterrows():
        
        question_in_function = row['translated_text']
        
        task = {
            "custom_id": f"task-{index}",
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                # This is what you would have in your Chat Completions API call
                "model": model_name,
                "temperature": model_temprature,
                "response_format": {"type": "json_schema", "json_schema": {"name": "medical_question_responsse", "schema": {'properties': {'justification': {'description': 'every steps that stated in prompt which isnt final answer or confidence score',
    'title': 'Justification',
    'type': 'string'},
    'final_answer': {'description': 'The numerical final answer value. it should be 1 or 2 or 3 or 4',
    'title': 'Final Answer',
    'type': 'integer'},
    'confidence_score': {'description': 'A score indicating the confidence level of the final answer. from 1 to 10',
    'title': 'Confidence Score',
    'type': 'integer'}},
    'required': ['justification', 'final_answer', 'confidence_score'],
    'title': 'FinalAnswer',
    'type': 'object',
    'additionalProperties': False}, "strict": True}},
                "messages": [
                    {
                        "role": "user",
                        "content": prompts[model_prompt].format(medical_mcq=question_in_function)
                    }
                ]
            }
        }
        
        tasks.append(task)
    return tasks

In [8]:
def save_raw_jsonl(data, filename):
    with open(filename, 'w') as file:
        for obj in data:
            file.write(json.dumps(obj) + '\n')


In [9]:


def retrieve_the_batches(dictio):
    for key, value in dictio.items():
        print(f"retrieving ")
        batch_job_id = value['batch_job_id']
        retrievd_batch_job = open_client.batches.retrieve(batch_job_id)
        
        if dictio.get(key).get('retrieved') == False or dictio.get(key).get('retrieved') == None:
            print(f"retrieving {key}")
            if retrievd_batch_job.status == 'completed':
                print("completed")
                result_file_id = retrievd_batch_job.output_file_id
                result = open_client.files.content(result_file_id).content
                result_file_name = f"saved/{key}.jsonl"
                with open(result_file_name, 'wb') as file:
                    file.write(result)
                print(f"Saved {key} to {result_file_name}")
                dictio[key]['retrieved'] = True
            else:
                print("not completed")
                if f"raw_jsonl/{key}.jsonl" not in didnt_raw_json_path:
                    didnt_raw_json_path.append(f"raw_jsonl/{key}.jsonl")
                if f"{key}" not in didnt_raw_just_path:
                    didnt_raw_just_path.append(f"{key}")
                dictio[key]['retrieved'] = False
        print("_________________________________________________________")

        


        

In [10]:
def dummy_retrieve_the_batches(dictio):
    for key, value in dictio.items():
        
        if dictio.get(key).get('retrieved') == False or dictio.get(key).get('retrieved') == None:
            print("not completed")

                
        print("_________________________________________________________")

In [11]:


def retrieve_the_batches_o3(dictio):
    for key, value in dictio.items():
        if "_T_1_P_" in key:
            print(f"retrieving ")
            batch_job_id = value['batch_job_id']
            retrievd_batch_job = open_client.batches.retrieve(batch_job_id)
            
            if dictio.get(key).get('retrieved') == False or dictio.get(key).get('retrieved') == None:
                print(f"retrieving {key}")
                if retrievd_batch_job.status == 'completed':
                    print("completed")
                    result_file_id = retrievd_batch_job.output_file_id
                    result = open_client.files.content(result_file_id).content
                    result_file_name = f"saved_o3/{key}.jsonl"
                    with open(result_file_name, 'wb') as file:
                        file.write(result)
                    print(f"Saved {key} to {result_file_name}")
                    dictio[key]['retrieved'] = True
                else:
                    print("not completed")

                    dictio[key]['retrieved'] = False
            print("_________________________________________________________")

        


        

In [12]:
def save_dict_temp(dictio,i):
    with open(f"didnt_dict/dict_iter_{i}.pkl", 'wb') as file:
        pickle.dump(dictio, file)

# with open('data.pkl', 'rb') as file:
#     loaded_dict = pickle.load(file)

In [13]:

def retrieve_the_didnt(dictio):
    for key, value in dictio.items():
        print(f"retrieving ")
        batch_job_id = value['batch_job_id']
        retrievd_batch_job = open_client.batches.retrieve(batch_job_id)
        
        if dictio.get(key).get('retrieved') == False or dictio.get(key).get('retrieved') == None:
            print(f"retrieving {key}")
            if retrievd_batch_job.status == 'completed':
                print("completed")
                result_file_id = retrievd_batch_job.output_file_id
                result = open_client.files.content(result_file_id).content
                result_file_name = f"saved_didnt/{key}.jsonl"
                with open(result_file_name, 'wb') as file:
                    file.write(result)
                print(f"Saved {key} to {result_file_name}")
                dictio[key]['retrieved'] = True
            else:
                print("not completed")
                # if f"raw_jsonl/{key}.jsonl" not in didnt_raw_json_path:
                #     didnt_raw_json_path.append(f"raw_jsonl/{key}.jsonl")
                # if f"{key}" not in didnt_raw_just_path:
                #     didnt_raw_just_path.append(f"{key}")
                dictio[key]['retrieved'] = False
        print("_________________________________________________________")

## pipeline

In [14]:
didnt_batches_dict = {}

In [16]:
## load from a pkl file
with open(r'didnt_dict\dict_iter_107.pkl', 'rb') as file:
    didnt_batches_dict = pickle.load(file)

In [17]:
didnt_batches_dict

{'M_gpt-4o-2024-08-06_T_0.7_P_roleplay_10': {'raw_file_name': 'M_gpt-4o-2024-08-06_T_0.7_P_roleplay_10',
  'batch_file': FileObject(id='file-Xi1SduVPMVRoa7mie6Co5t', bytes=659678, created_at=1738609436, filename='M_gpt-4o-2024-08-06_T_0.7_P_roleplay_10.jsonl', object='file', purpose='batch', status='processed', status_details=None),
  'batch_job': Batch(id='batch_67a1131d38a48190bef2eab1db750076', completion_window='24h', created_at=1738609437, endpoint='/v1/chat/completions', input_file_id='file-Xi1SduVPMVRoa7mie6Co5t', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1738695837, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0)),
  'batch_job_id': 'batch_67a1131d38a48190bef2eab1db750076'},
 'M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_100': {'raw_file_name': 'M_gpt-4o-2024-08

In [18]:
retrieve_the_didnt(didnt_batches_dict)

retrieving 
retrieving M_gpt-4o-2024-08-06_T_0.7_P_roleplay_10
completed
Saved M_gpt-4o-2024-08-06_T_0.7_P_roleplay_10 to saved_didnt/M_gpt-4o-2024-08-06_T_0.7_P_roleplay_10.jsonl
_________________________________________________________
retrieving 
retrieving M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_100
completed
Saved M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_100 to saved_didnt/M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_100.jsonl
_________________________________________________________
retrieving 
retrieving M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_qual
completed
Saved M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_qual to saved_didnt/M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_qual.jsonl
_________________________________________________________
retrieving 
retrieving M_gpt-4o-2024-08-06_T_1_P_cot_fewshot_10
completed
Saved M_gpt-4o-2024-08-06_T_1_P_cot_fewshot_10 to saved_didnt/M_gpt-4o-2024-08-06_T_1_P_cot_fewshot_10.jsonl
_________________________________________________________
retrieving 
retrie

In [11]:
batch_dict = {} 

In [52]:
batch_dict_o3_new = {}

In [88]:
didnt_batches_dict = {}

In [93]:
for mamad in range(77,108):
    model, temperature, prompt_name, final_name = get_experiment_config(mamad)
    if final_name in didnt_raw_just_path:
        print("DOING")
        tasks = creat_tasks(model,temperature,prompt_name)
        final_file_name = fr"raw_jsonl\{final_name}.jsonl"
        save_raw_jsonl(tasks,final_file_name)
        batch_file = open_client.files.create(file=open(final_file_name, "rb"),purpose="batch")
        batch_job = open_client.batches.create(input_file_id=batch_file.id,endpoint="/v1/chat/completions",completion_window="24h")
        batch_job_id = batch_job.id
        
        didnt_batches_dict[final_name] = {
            'raw_file_name': final_name,
            'batch_file': batch_file,
            'batch_job': batch_job,
            'batch_job_id': batch_job_id
        }
        save_dict_temp(didnt_batches_dict,mamad)


Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: emotional_10
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: cot_100
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: few_shot_100
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: cot_fewshot_100
DOING
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: roleplay_100
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: zeroshot_100
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: emotional_100
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: cot_qual
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: few_shot_qual
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: cot_fewshot_qual
DOING
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: roleplay_qual
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: zeroshot_qual
Model: gpt-4o-2024-08-06, Temperature: 0.7, Prompt: emotional_qual
Model: gpt-4o-2024-08-06, Temperature: 1, Prompt: cot_10
Model: gpt-4o-2024-08-06, Temperature: 1, Prompt: few_shot_10
Model: gpt-4o-2024-0

In [23]:
batch_dict['M_gpt-4o-2024-08-06_T_0.3_P_few_shot_qual']['batch_job_id']

'batch_67a0e5b4ee048190a1e554878316b9c5'

In [94]:
batch_dict_o3_new

{'M_o3-mini-2025-01-31_T_0.3_P_cot_10': {'raw_file_name': 'M_o3-mini-2025-01-31_T_0.3_P_cot_10',
  'batch_file': FileObject(id='file-Hwy1RWDW4SsnfmDhZ7kcsY', bytes=778778, created_at=1738606280, filename='M_o3-mini-2025-01-31_T_0.3_P_cot_10.jsonl', object='file', purpose='batch', status='processed', status_details=None),
  'batch_job': Batch(id='batch_67a106c952fc819083719273cb136745', completion_window='24h', created_at=1738606281, endpoint='/v1/chat/completions', input_file_id='file-Hwy1RWDW4SsnfmDhZ7kcsY', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1738692681, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0)),
  'batch_job_id': 'batch_67a106c952fc819083719273cb136745'},
 'M_o3-mini-2025-01-31_T_0.3_P_few_shot_10': {'raw_file_name': 'M_o3-mini-2025-01-31_T_0.3_P_fe

In [24]:
retrievd_batch_job_3 = open_client.batches.retrieve(batch_dict['M_gpt-4o-2024-08-06_T_0.3_P_few_shot_qual']['batch_job_id'])


Batch(id='batch_67a0e5b4ee048190a1e554878316b9c5', completion_window='24h', created_at=1738597813, endpoint='/v1/chat/completions', input_file_id='file-RfpmoE2B7rc2EuBzztcg6d', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1738600038, error_file_id=None, errors=None, expired_at=None, expires_at=1738684213, failed_at=None, finalizing_at=1738600019, in_progress_at=1738597814, metadata=None, output_file_id='file-ExxJDYaycgztewgiGTuvyd', request_counts=BatchRequestCounts(completed=300, failed=0, total=300))

In [25]:
retrievd_batch_job_3.status

'completed'

In [29]:
def remove_retrieved_key(dictionary):
    for value in dictionary.values():
        if isinstance(value, dict) and 'retrieved' in value:
            del value['retrieved']
            print(value)
    return dictionary

new_batch_dict = remove_retrieved_key(batch_dict)    

{'raw_file_name': 'M_gpt-4o-2024-08-06_T_0.3_P_few_shot_qual', 'batch_file': FileObject(id='file-RfpmoE2B7rc2EuBzztcg6d', bytes=966278, created_at=1738597811, filename='M_gpt-4o-2024-08-06_T_0.3_P_few_shot_qual.jsonl', object='file', purpose='batch', status='processed', status_details=None), 'batch_job': Batch(id='batch_67a0e5b4ee048190a1e554878316b9c5', completion_window='24h', created_at=1738597813, endpoint='/v1/chat/completions', input_file_id='file-RfpmoE2B7rc2EuBzztcg6d', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1738684213, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0)), 'batch_job_id': 'batch_67a0e5b4ee048190a1e554878316b9c5'}
{'raw_file_name': 'M_gpt-4o-2024-08-06_T_0.3_P_cot_fewshot_qual', 'batch_file': FileObject(id='file-M598DGoTo2A4Tt3cSfzQ1T', bytes

In [31]:
len(new_batch_dict)

41

In [86]:
didnt_raw_json_path = []
didnt_raw_just_path = []

In [87]:
retrieve_the_batches(new_batch_dict)



retrieving 
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
retrieving M_gpt-4o-2024-08-06_T_0.7_P_roleplay_10
not completed
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
_________________________________________________________
retrieving 
retrieving M_

In [89]:
didnt_raw_just_path

['M_gpt-4o-2024-08-06_T_0.7_P_roleplay_10',
 'M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_100',
 'M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_qual',
 'M_gpt-4o-2024-08-06_T_1_P_cot_fewshot_10',
 'M_gpt-4o-2024-08-06_T_1_P_zeroshot_10',
 'M_gpt-4o-2024-08-06_T_1_P_emotional_100',
 'M_gpt-4o-2024-08-06_T_1_P_zeroshot_qual',
 'M_gpt-4o-2024-08-06_T_1_P_emotional_qual']

In [90]:
didnt_raw_json_path

['raw_jsonl/M_gpt-4o-2024-08-06_T_0.7_P_roleplay_10.jsonl',
 'raw_jsonl/M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_100.jsonl',
 'raw_jsonl/M_gpt-4o-2024-08-06_T_0.7_P_cot_fewshot_qual.jsonl',
 'raw_jsonl/M_gpt-4o-2024-08-06_T_1_P_cot_fewshot_10.jsonl',
 'raw_jsonl/M_gpt-4o-2024-08-06_T_1_P_zeroshot_10.jsonl',
 'raw_jsonl/M_gpt-4o-2024-08-06_T_1_P_emotional_100.jsonl',
 'raw_jsonl/M_gpt-4o-2024-08-06_T_1_P_zeroshot_qual.jsonl',
 'raw_jsonl/M_gpt-4o-2024-08-06_T_1_P_emotional_qual.jsonl']

In [68]:
batch_dict_o3

{'M_o3-mini-2025-01-31_T_0.3_P_cot_10': {'raw_file_name': 'M_o3-mini-2025-01-31_T_0.3_P_cot_10',
  'batch_file': FileObject(id='file-RXAQHkdkKGHUppT7JT1Mvj', bytes=789878, created_at=1738598485, filename='M_o3-mini-2025-01-31_T_0.3_P_cot_10.jsonl', object='file', purpose='batch', status='processed', status_details=None),
  'batch_job': Batch(id='batch_67a0e85655988190bd7f91b929fbe3fd', completion_window='24h', created_at=1738598486, endpoint='/v1/chat/completions', input_file_id='file-RXAQHkdkKGHUppT7JT1Mvj', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1738684886, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0)),
  'batch_job_id': 'batch_67a0e85655988190bd7f91b929fbe3fd'},
 'M_o3-mini-2025-01-31_T_0.3_P_few_shot_10': {'raw_file_name': 'M_o3-mini-2025-01-31_T_0.3_P_fe

In [96]:
batch_dict_o3_new_very = {}
for key,value in batch_dict_o3_new.items():
    if "_T_1_P_" in key:
        batch_dict_o3_new_very[key] = value

In [98]:
len(batch_dict_o3_new_very)

18

In [101]:
batch_dict_o3_new_very

{'M_o3-mini-2025-01-31_T_1_P_cot_10': {'raw_file_name': 'M_o3-mini-2025-01-31_T_1_P_cot_10',
  'batch_file': FileObject(id='file-VzX3NBKiU29jAurEb8mRSk', bytes=778178, created_at=1738606655, filename='M_o3-mini-2025-01-31_T_1_P_cot_10.jsonl', object='file', purpose='batch', status='processed', status_details=None),
  'batch_job': Batch(id='batch_67a10840b9c88190a4988e7d4306cbf0', completion_window='24h', created_at=1738606656, endpoint='/v1/chat/completions', input_file_id='file-VzX3NBKiU29jAurEb8mRSk', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1738693056, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0)),
  'batch_job_id': 'batch_67a10840b9c88190a4988e7d4306cbf0'},
 'M_o3-mini-2025-01-31_T_1_P_few_shot_10': {'raw_file_name': 'M_o3-mini-2025-01-31_T_1_P_few_shot_10'

In [103]:
retrieve_the_batches_o3(batch_dict_o3_new_very)

retrieving 
retrieving M_o3-mini-2025-01-31_T_1_P_cot_10
completed
Saved M_o3-mini-2025-01-31_T_1_P_cot_10 to saved_o3/M_o3-mini-2025-01-31_T_1_P_cot_10.jsonl
_________________________________________________________
retrieving 
retrieving M_o3-mini-2025-01-31_T_1_P_few_shot_10
completed
Saved M_o3-mini-2025-01-31_T_1_P_few_shot_10 to saved_o3/M_o3-mini-2025-01-31_T_1_P_few_shot_10.jsonl
_________________________________________________________
retrieving 
retrieving M_o3-mini-2025-01-31_T_1_P_cot_fewshot_10
completed
Saved M_o3-mini-2025-01-31_T_1_P_cot_fewshot_10 to saved_o3/M_o3-mini-2025-01-31_T_1_P_cot_fewshot_10.jsonl
_________________________________________________________
retrieving 
retrieving M_o3-mini-2025-01-31_T_1_P_roleplay_10
completed
Saved M_o3-mini-2025-01-31_T_1_P_roleplay_10 to saved_o3/M_o3-mini-2025-01-31_T_1_P_roleplay_10.jsonl
_________________________________________________________
retrieving 
retrieving M_o3-mini-2025-01-31_T_1_P_zeroshot_10
completed
Saved 

## tests

In [6]:
from openai.lib._pydantic import to_strict_json_schema
to_strict_json_schema(FinalAnswer)

{'properties': {'justification': {'description': 'every steps that stated in prompt which isnt final answer or confidence score',
   'title': 'Justification',
   'type': 'string'},
  'final_answer': {'description': 'The numerical final answer value. it should be 1 or 2 or 3 or 4',
   'title': 'Final Answer',
   'type': 'integer'},
  'confidence_score': {'description': 'A score indicating the confidence level of the final answer. from 1 to 10',
   'title': 'Confidence Score',
   'type': 'integer'}},
 'required': ['justification', 'final_answer', 'confidence_score'],
 'title': 'FinalAnswer',
 'type': 'object',
 'additionalProperties': False}

In [56]:
response_format= {"type": "json_schema", "json_schema": {"name": "math_response", "schema": {'properties': {'justification': {'description': 'every steps that stated in prompt which isnt final answer or confidence score',
   'title': 'Justification',
   'type': 'string'},
  'final_answer': {'description': 'The numerical final answer value. it should be 1 or 2 or 3 or 4',
   'title': 'Final Answer',
   'type': 'integer'},
  'confidence_score': {'description': 'A score indicating the confidence level of the final answer. from 1 to 10',
   'title': 'Confidence Score',
   'type': 'integer'}},
 'required': ['justification', 'final_answer', 'confidence_score'],
 'title': 'FinalAnswer',
 'type': 'object',
 'additionalProperties': False}, "strict": True}}

In [61]:
def get_categories():
    response = open_client.chat.completions.create(
    model="o3-mini",
    # This is to enable JSON mode, making sure responses are valid json objects
    response_format=response_format,
    messages=[
        {
            "role": "user",
            "content": prompts["cot_10"].format(medical_mcq=question_1)
        }
    ],
    )

    return response.choices[0].message.content

In [62]:
res = get_categories()

In [63]:
res

'{\n  "justification": "The patient’s presentation with chest pain that improves when sitting and worsens when lying down is characteristic of acute pericarditis. The typical electrocardiographic changes in acute pericarditis include diffuse ST segment elevation (often with a concave upward morphology) along with PR segment depressions, and reciprocal ST depressions in lead aVR (and sometimes V1). Option 1, which describes a downsloping ST segment in leads aVR and V1, best corresponds to these reciprocal changes. The other options describe findings that are more characteristic of myocardial infarction (such as Q waves) or do not reflect the typical ECG progression seen in acute pericarditis.",\n  "final_answer": 1,\n  "confidence_score": 9\n}'

In [32]:
# Creating an array of json tasks

tasks = []

for index, row in sampled_df_300.iterrows():
    
    question = row['translated_text']
    
    task = {
        "custom_id": f"task-{index}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            # This is what you would have in your Chat Completions API call
            "model": "gpt-4o-2024-08-06",
            "temperature": 0.3,
            "response_format": {"type": "json_schema", "json_schema": {"name": "math_response", "schema": {'properties': {'justification': {'description': 'every steps that stated in prompt which isnt final answer or confidence score',
   'title': 'Justification',
   'type': 'string'},
  'final_answer': {'description': 'The numerical final answer value. it should be 1 or 2 or 3 or 4',
   'title': 'Final Answer',
   'type': 'integer'},
  'confidence_score': {'description': 'A score indicating the confidence level of the final answer. from 1 to 10',
   'title': 'Confidence Score',
   'type': 'integer'}},
 'required': ['justification', 'final_answer', 'confidence_score'],
 'title': 'FinalAnswer',
 'type': 'object',
 'additionalProperties': False}, "strict": True}},
            "messages": [
                {
                    "role": "user",
                    "content": prompts["few_shot_qual"].format(medical_mcq=question_1)
                }
            ],
            "logprobs":True,
            "top_logprobs":1,
        }
    }
    
    tasks.append(task)

In [33]:
# Creating the file

file_name = f"{final_name}.jsonl"

with open(file_name, 'w') as file:
    for obj in tasks:
        file.write(json.dumps(obj) + '\n')

In [34]:
batch_file = open_client.files.create(
  file=open(file_name, "rb"),
  purpose="batch"
)

In [35]:
print(batch_file)

FileObject(id='file-JYAHGstZ2dwVLYeoGGAPvH', bytes=955090, created_at=1738592715, filename='M_gpt-4o-2024-08-06_T_0.3_P_few_shot_qual.jsonl', object='file', purpose='batch', status='processed', status_details=None)


In [36]:
batch_job = open_client.batches.create(
  input_file_id=batch_file.id,
  endpoint="/v1/chat/completions",
  completion_window="24h"
)

In [40]:
batch_job = open_client.batches.retrieve(batch_job.id)
print(batch_job)

Batch(id='batch_67a0d1f1482081908f87250daaa82199', completion_window='24h', created_at=1738592753, endpoint='/v1/chat/completions', input_file_id='file-JYAHGstZ2dwVLYeoGGAPvH', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1738592959, error_file_id=None, errors=None, expired_at=None, expires_at=1738679153, failed_at=None, finalizing_at=1738592932, in_progress_at=1738592755, metadata=None, output_file_id='file-HfGXVAEqDhEu17p291SXFm', request_counts=BatchRequestCounts(completed=300, failed=0, total=300))


In [41]:
result_file_id = batch_job.output_file_id
result = open_client.files.content(result_file_id).content
result_file_name = f"saved/{final_name}.jsonl"

with open(result_file_name, 'wb') as file:
    file.write(result)

In [None]:
"response_format"= {"type": "json_schema", "json_schema": {"name": "math_response", "schema": {"type": "object", "properties": {"steps": {"type": "array", "items": {"type": "object", "properties": {"explanation": {"type": "string"}, "output": {"type": "string"}}, "required": ["explanation", "output"], "additionalProperties": false}}, "final_answer": {"type": "string"}}, "required": ["steps", "final_answer"], "additionalProperties": false}, "strict": true}}

In [None]:
tasks = []

for index, row in df.iterrows():
    
    description = row['Overview']
    
    task = {
        "custom_id": f"task-{index}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            # This is what you would have in your Chat Completions API call
            "model": "gpt-4o-mini",
            "temperature": 0.1,
            "response_format": { 
                "type": "json_object"
            },
            "messages": [
                {
                    "role": "system",
                    "content": categorize_system_prompt
                },
                {
                    "role": "user",
                    "content": description
                }
            ],
        }
    }
    
    tasks.append(task)