In [19]:
pip install openai

Note: you may need to restart the kernel to use updated packages.


In [35]:
import os
import json
import re
from collections import Counter
from dotenv import load_dotenv
import openai

# to load the OPENAI api key:
load_dotenv()

True

In [37]:
openai.api_key = os.getenv('OPENAI_API_KEY')

In [39]:
def extract_data(data_split_path):
    reviews_path = os.path.join(data_split_path, 'reviews')
    parsed_pdfs_path = os.path.join(data_split_path, 'parsed_pdfs')
    data_list = []

    if not os.path.exists(reviews_path) or not os.path.exists(parsed_pdfs_path):
        print(f"Reviews path or parsed PDFs path does not exist.")
        return data_list

    # Iterate over the review files
    for review_filename in os.listdir(reviews_path):
        if review_filename.endswith('.json'):
            review_file_path = os.path.join(reviews_path, review_filename)
            with open(review_file_path, 'r', encoding='utf-8') as f:
                paper_data = json.load(f)

            # The file name (without extension) corresponds to the paper ID
            paper_id = os.path.splitext(review_filename)[0]
            parsed_pdf_file = os.path.join(parsed_pdfs_path, f"{paper_id}.pdf.json")

            if os.path.exists(parsed_pdf_file):
                with open(parsed_pdf_file, 'r', encoding='utf-8') as f_pdf:
                    parsed_pdf = json.load(f_pdf)
                    metadata = parsed_pdf.get('metadata', {})
                    title = metadata.get('title', 'No title')
                    abstract_text = metadata.get('abstractText', '')
                    sections = parsed_pdf.get('pdf_parse', {}).get('body_text', [])

                    # Combine section texts
                    section_texts = ' '.join([
                        f"{section.get('section', '')}: {section.get('text', '')}"
                        for section in sections
                    ])
                    full_body = f"{title} {abstract_text} {section_texts}".strip()
            else:
                # If the parsed PDF is not available, skip this paper
                continue

            # Extract reviews
            reviews = paper_data.get('reviews', [])
            review_texts = []
            if isinstance(reviews, list):
                for review in reviews:
                    comments = review.get('comments', '').strip()
                    if comments:
                        review_texts.append(comments)

            if review_texts:
                data_list.append({
                    'title': title,
                    'abstract': abstract_text,
                    'paper_content': full_body,
                    'reviews': review_texts
                })

    return data_list




In [41]:
def create_prompt(data_list, num_examples=3):
    """
    Creates a few-shot prompt for GPT-3. Includes a few examples of feedback for papers.
    """
    prompt = ""

    for i, data in enumerate(data_list[:num_examples]):
        title = data['title']
        abstract = data['abstract']
        paper_content = data['paper_content']
        review = data['reviews'][0] if data['reviews'] else "No review available."

        # Example structure: Paper details followed by review
        example = (
            f"Paper {i + 1}:\n"
            f"Title: {title}\n"
            f"Abstract: {abstract}\n"
            f"Paper Content: {paper_content[:500]}...\n"  # Truncate for brevity
            f"Feedback: {review}\n\n"
        )
        prompt += example

    # Adding new paper for GPT-3 to give feedback on
    new_paper = data_list[num_examples]
    prompt += (
        f"Paper {num_examples + 1}:\n"
        f"Title: {new_paper['title']}\n"
        f"Abstract: {new_paper['abstract']}\n"
        f"Paper Content: {new_paper['paper_content'][:500]}...\n"
        f"Feedback:"
    )

    return prompt

In [43]:

def main():
    data_split_path = 'PeerRead/data/acl_2017/train' # usign teh acl_2017 one first..
    data_list = extract_data(data_split_path)

    if len(data_list) < 4:
        print("Not enough data to create a prompt.") # enough data 
        return

    # Create the prompt
    prompt = create_prompt(data_list, num_examples=3)

    # Make the API call
    try:
        response = openai.Completion.create(
            engine='gpt-3.5-turbo-instruct',  # you can also edit the engine here: https://stackoverflow.com/questions/77789886/openai-api-error-the-model-text-davinci-003-has-been-deprecated
            prompt=prompt,
            max_tokens=150,  
            temperature=0.7,  
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0
        )

        # Extract the generated feedback
        generated_feedback = response.choices[0].text.strip()

        # Print the feedback output from api call: 
        print("Generated Feedback:\n", generated_feedback)

    except Exception as e:
        print("An error occurred while calling the OpenAI API:", e)


if __name__ == "__main__":
    main() # run main func..


An error occurred while calling the OpenAI API: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
