In [6]:
# importing necessities
import json
from openai import OpenAI, OpenAIError
from dotenv import load_dotenv
import os

In [12]:
# loading API key
load_dotenv()
client = OpenAI()

def chunk_text(text, max_words=1500):
    words = text.split()
    return [" ".join(words[i:i+max_words]) for i in range(0, len(words), max_words)]

def parse_response(response_text):
    try:
        return json.loads(response_text)
    except json.JSONDecodeError:
        return {"rating": None, "explanation": response_text}

def aggregate_results(results):
    ratings = [r.get("rating") for r in results if r.get("rating") is not None]
    explanations = [r.get("explanation", "") for r in results]
    avg_rating = sum(ratings) / len(ratings) if ratings else None
    combined_explanation = "\n---\n".join(explanations)
    return {"average_rating": avg_rating, "combined_explanation": combined_explanation}

def call_compare_api(abstract, summary_chunk):
    prompt = f"""
You are an expert in psychology research. Compare the following abstract and summary:

Abstract:
{abstract}

Summary:
{summary_chunk}

Please respond ONLY with JSON in this format:
{{"rating": <number from 1 to 5>, "explanation": "<text>"}}
"""
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=300,
        temperature=0,
    )
    return response.choices[0].message.content

def compare_abstract_summary(abstract, summary):
    try:
        response_text = call_compare_api(abstract, summary)
        return parse_response(response_text)
    except Exception as e:
        if "context_length_exceeded" in str(e):
            print("Context length exceeded. Chunking summary...")
            summary_chunks = chunk_text(summary, max_words=1500)
            results = []
            for i, chunk in enumerate(summary_chunks, 1):
                print(f"Comparing chunk {i} of {len(summary_chunks)}...")
                chunk_resp = call_compare_api(abstract, chunk)
                parsed = parse_response(chunk_resp)
                results.append(parsed)
            return aggregate_results(results)
        else:
            print(f"Error in compare_abstract_summary: {e}")
            return {"rating": None, "explanation": str(e)}

# loading JSON file with abstracts and summaries
INPUT_JSON_PATH = "new_data/summaries.json"
OUTPUT_JSON_PATH = "new_data/comparison_results.json"

try:
    with open(INPUT_JSON_PATH, "r") as f:
        papers = json.load(f)
except FileNotFoundError:
    print(f"Input file {INPUT_JSON_PATH} not found.")
    papers = []

comparison_results = {}

try:
    for paper in papers:
        filename = paper.get("filename", "unknown")
        abstract = paper.get("abstract", "")
        summary = paper.get("summary", "")
        print(f"Comparing abstract and summary for {filename}...")
        try:
            result = compare_abstract_summary(abstract, summary)
            comparison_results[filename] = result
        except Exception as e:
            print(f"Error comparing {filename}: {e}")

finally:
    # saves results in case of error along the way
    with open(OUTPUT_JSON_PATH, "w") as f:
        json.dump(comparison_results, f, indent=2)
    print(f"Comparison completed and results saved to '{OUTPUT_JSON_PATH}'.")


Comparing abstract and summary for new18.pdf...
Comparing abstract and summary for new24.pdf...
Comparing abstract and summary for psychpaper14.pdf...
Comparing abstract and summary for psychpaper15.pdf...
Comparing abstract and summary for psychpaper01.pdf...
Comparing abstract and summary for new25.pdf...
Comparing abstract and summary for new19.pdf...
Comparing abstract and summary for psychpaper17.pdf...
Comparing abstract and summary for psychpaper03.pdf...
Comparing abstract and summary for psychpaper02.pdf...
Comparing abstract and summary for psychpaper16.pdf...
Comparing abstract and summary for new22.pdf...
Comparing abstract and summary for psychpaper12.pdf...
Comparing abstract and summary for psychpaper07.pdf...
Comparing abstract and summary for psychpaper13.pdf...
Comparing abstract and summary for new23.pdf...
Comparing abstract and summary for new21.pdf...
Comparing abstract and summary for new09.pdf...
Comparing abstract and summary for psychpaper05.pdf...
Comparing a