# Process 300 papers using GPT 4o and 4o-mini

In [1]:
import os
import json
from keys import (
    DEPLOYMENT_GPT_4o_MINI,
    DEPLOYMENT_GPT_4o,
    API_VERSION,
    ENDPOINT,
    KEY1
)

from llm_client import LLMClient

### Helper Functions

In [2]:
def get_300_sampled_papers():
    papers_path = os.path.join("review-5k-dataset", "300")
    sampled_300_papers = os.listdir(papers_path)
    if ".DS_Store" in sampled_300_papers:
        sampled_300_papers.remove(".DS_Store")
    return sampled_300_papers

def load_already_processed_papers(deployment):
    processed_path = os.path.join("LLM_Responses", deployment)
    processed_papers = os.listdir(processed_path)
    if ".DS_Store" in processed_papers:
        processed_papers.remove(".DS_Store")
    return processed_papers

def load_paper(paper_json):
    json_path = os.path.join("review-5k-dataset", "300", paper_json)
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f) 
    return data

def save_llm_response(response_file, deployment, llm_response):
    txt_path = os.path.join("LLM_Responses", deployment, response_file)
    with open(txt_path, "w", encoding="utf-8") as f:
        f.write(llm_response)

# Run Code

In [3]:
DEPLOYMENT = DEPLOYMENT_GPT_4o

In [4]:
# sampled_300 = get_300_sampled_papers()
sampled_300 = [
    "a7eIuzEh2R.json",
]
# already_processed = load_already_processed_papers(DEPLOYMENT)
already_processed = []

for i, paper in enumerate(sampled_300):
    paper_dict = load_paper(paper)
    messages = paper_dict["messages"]
    for msg in messages:
        if msg["role"] == "assistant":
            messages.remove(msg)
        if msg["role"] == "system":
            msg["role"] = "user"
    messages.append({
        "role": "user",
        "content": "You are required to generate **4** reviews for this paper, following the instructions given above. Make sure that all the reviews are separated by a visible indicator."
    })
    paper_id = paper_dict["id"]
    paper_txt = f"{paper_id}.txt"
    if paper_txt in already_processed:
        continue
    else:
        llm_c = LLMClient(
            api_key=KEY1,
            api_version=API_VERSION,
            endpoint=ENDPOINT,
            deployment=DEPLOYMENT
        )
        llm_resp = llm_c.get_llm_response(messages=messages)
        if llm_resp:
            save_llm_response(paper_txt, DEPLOYMENT, llm_resp)
        else:
            print(f"[-] Failed to process: {paper_txt}")