In [None]:
import openai
import json
import math
from openai.lib._parsing._completions import type_to_response_format_param
import pandas as pd

import functions.prompts as prompts

client = openai.Client()

In [None]:
df = pd.read_csv("papers.csv")
df.head()

In [None]:
def wrap(body, id):
    return {
        "custom_id": id,
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": body
    }
    
def gen_body(text, top5=True):
    return {
            "model": "gpt-4o-mini",
            "messages":[{
                    "role": "system",
                    "content": prompts.top5() if top5 else prompts.analysis()
                },
                {
                    "role": "user",
                    "content": text
                }],
            "response_format": type_to_response_format_param(prompts.Top5Model if top5 else prompts.AnalysisModel)
        }


In [None]:
import numpy as np

divider = 1

for index, row in df.iterrows():
    with open(f'output/{row["id"]}.txt', 'r') as f:
        text = f.read()
        paper = f"PAPER TITLE: {row['name']}\n\nPAPER TEXT: {text}"
        # paper = text
        # [ wrap(gen_body(f"PAPER TITLE: {row['name']}\n\nPAPER TEXT: {text}"), f"{row["id"]}_{i}") for i in range(3) ]
        id = [  *[ wrap( gen_body(paper),                f"{row["id"]}|{i}+top5") for i in range(3) ],
                *[ wrap( gen_body(paper, top5=False),    f"{row['id']}|{i}+analysis") for i in range(3) ] ]
        
        file_index = math.floor(index / (len(df) / divider))
        # file_index = 0

        with open(f"dump/openai-batch-jsonl/batch-{file_index}.jsonl", 'a') as f:
            f.write(f"{"\n".join([json.dumps(i) for i in id])}\n")

In [None]:
batch_input_files = []

for i in range(divider):
    print(f"Sending batch {i}")
    batch_input_file = client.files.create(
        file=open(f"dump/openai-batch/batch-{i}.jsonl", "rb"),
        purpose="batch"
    )
    print(f"{i} {batch_input_file.id}")
    batch_input_files.append(batch_input_file)

print(batch_input_files)

In [None]:
# KINDA DANGEROUS
batch_objects = []
for i, b in enumerate(batch_input_files):
    batch_input_file_id = b.id
    batch_object = client.batches.create(
        input_file_id=batch_input_file_id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={
            "description": f"[AI REVIEWER] Set {i}"
        }
    )
    print(f"{i} {batch_object.id}")
    batch_objects.append(batch_object)

In [None]:
with open("dump/openai-batch/openai-batches.txt", 'w') as f:
    f.write("\n".join([i.id for i in batch_objects]))

In [None]:
batches = []
with open("dump/openai-batch/openai-batches.txt", 'r') as f:
    batches = f.read()
    batches = batches.split("\n")

In [None]:
file_ids = []
for i, batch in enumerate(batches):
    x = client.batches.retrieve(batch)
    print(x)
    file_ids.append(x.output_file_id)

In [None]:
file_ids

In [None]:
for i, id in enumerate(file_ids):
    file_response = client.files.content(id)
    with open(f"dump/eval-output/openai-output.jsonl", 'a') as f:
        f.write(file_response.text)

In [None]:
import pandas as pd
import json

df = pd.read_csv("papers.csv")

f = open("dump/eval-output/openai-output.jsonl", "r")
file_response = f.read()
f.close()

for line in file_response.split("\n")[:-1]:
    l = json.loads(line)
    id = l['custom_id'].split("|")[0]
    no, typ = l['custom_id'].split("|")[1].split("+")
    
    idx = df.index[df['id'] == id].tolist()[0]
    
    content = l['response']['body']['choices'][0]['message']['content']
    
    metrics = ['score'] if typ == "top5" else ['originality', 'rigor', 'scope', 'impact', 'written_by_ai']
    validateModel = prompts.Top5Model if typ == "top5" else prompts.AnalysisModel

    for i, metric in enumerate(metrics):
        column_name = f"openai-{metric}-{int(no)+1}"
        
        if column_name not in df.columns:
            df[column_name] = None

        o = validateModel.model_validate_json(content)
        df.loc[idx, column_name] = o.__dict__[metric]

In [None]:
df

In [None]:
df.to_csv("result_o.csv", index=False)