# Systematically reviewing the literature using the PRISMA method and LLMs

In this notebook, I experimentally test how LLMs may help filtering out some papers in the context of the PRISMA method for systematically reviewing the literature.

#### Utilities

In [1]:
import requests
import re
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema import HumanMessage
import os


# Regex patterns for title, abstract, and keywords
title_pattern = re.compile(r'<title>(.*?)</title>')
abstract_pattern = re.compile(
    r'<blockquote class="abstract mathjax">\s*<span class="descriptor">Abstract:</span>\s*(.*?)\s*</blockquote>',
    re.DOTALL
)
keywords_pattern = re.compile(r'<td class="tablecell keywords">Keywords:</td>\s*<td class="tablecell">(.*?)</td>', re.DOTALL)

def fetch_arxiv_data(arxiv_id: str):
    """Fetches the title, abstract, and keywords from an ArXiv paper given the ArXiv ID."""
    # Get the paper page in HTML
    res = requests.get(f"https://export.arxiv.org/abs/{arxiv_id}")
    html_content = res.text
    
    # Extract title
    title_match = title_pattern.search(html_content)
    title = title_match.group(1).strip() if title_match else ""
    
    # Extract abstract
    abstract_match = abstract_pattern.search(html_content)
    abstract = abstract_match.group(1).strip() if abstract_match else ""
    
    # Extract keywords (note: arXiv doesn't always provide keywords)
    keywords_match = keywords_pattern.search(html_content)
    keywords = keywords_match.group(1).strip() if keywords_match else ""
    
    return {
        "title": title,
        "abstract": abstract,
        "keywords": keywords
    }

In [2]:
"""
# Example usage
arxiv_id = "2401.04088"
paper_data = fetch_arxiv_data(arxiv_id)
paper_data
"""

'\n# Example usage\narxiv_id = "2401.04088"\npaper_data = fetch_arxiv_data(arxiv_id)\npaper_data\n'

In [3]:
def ask_inclusion_criteria_with_langchain(llm: ChatOpenAI, title: str, abstract: str, keywords: str, questions: list):
    """Given title, abstract, and keywords, answers Yes/No inclusion/exclusion questions and provides extended answers."""
    paper_text = f"Title: {title}\nAbstract: {abstract}\nKeywords: {keywords}"
    
    results = {"yes_no_answers": {}, "extended_answers": {}}
    
    for i, question in enumerate(questions, 1):
        # Prepare the message prompt for both simple and extended answers
        simple_prompt = ChatPromptTemplate.from_template(
            f"Based on the following paper details, answer Yes or No to the question: {question}\n\nPaper details:\n{paper_text}"
        )
        extended_prompt = ChatPromptTemplate.from_template(
            f"Based on the following paper details, answer Yes or No to the question: {question}. Also, briefly explain your reasoning behind the answer.\n\nPaper details:\n{paper_text}"
        )
        
        # Get the concise Yes/No answer
        simple_response = llm([HumanMessage(content=simple_prompt.format())])
        simple_answer = simple_response.content.strip().lower()
        results["yes_no_answers"][f"Q{i}"] = True if "yes" in simple_answer else False
        
        # Get the extended answer with reasoning
        extended_response = llm([HumanMessage(content=extended_prompt.format())])
        extended_answer = extended_response.content.strip()
        results["extended_answers"][f"Q{i}"] = extended_answer
    
    return results


In [4]:
"""
# Example usage
paper_data = fetch_arxiv_data("2401.04088")
inclusion_criteria_answers, extended_answers = ask_inclusion_criteria_with_langchain(
    title=paper_data['title'],
    abstract=paper_data['abstract'],
    keywords=paper_data['keywords']
)

inclusion_criteria_answers, extended_answers
"""

'\n# Example usage\npaper_data = fetch_arxiv_data("2401.04088")\ninclusion_criteria_answers, extended_answers = ask_inclusion_criteria_with_langchain(\n    title=paper_data[\'title\'],\n    abstract=paper_data[\'abstract\'],\n    keywords=paper_data[\'keywords\']\n)\n\ninclusion_criteria_answers, extended_answers\n'

In [5]:
def determine_inclusion(answers: dict):
    """Given a dictionary of Yes/No answers, determine if the paper should be included."""
    threshold = len(answers)
    positive_answers = sum(answers.values())
    return positive_answers >= threshold

In [6]:
"""
# Example usage
should_include = determine_inclusion(inclusion_criteria_answers)
print("Should the paper be included?", should_include)
"""

'\n# Example usage\nshould_include = determine_inclusion(inclusion_criteria_answers)\nprint("Should the paper be included?", should_include)\n'

### Init

In [7]:
# Fetch title, abstract, and keywords
arxiv_ids = ["2406.13470", "2201.00927", "2401.04088"]

# Set up the questions you want to ask
questions = [
    "Is the paper written in English?",
    "Does the paper describe an empirical study?",
    "Does the study include human participants?",
    "Does the study include participants with (possibly suspected) autism?",
    "Is the goal of the study to detect autism?",
    "Does the study include AI tools?",
    "Does the study include behavioral data?"
]

# Set your OpenAI API key in the environment (ensure it's available before running the script)
os.environ["OPENAI_API_KEY"] = ""

### Main

In [8]:
# Initialize the OpenAI model
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)  # Adjust the model as needed

# Store inclusion decisions and answers for each paper
should_include_list = []
detailed_answers_list = []

for arxiv_id in arxiv_ids:
    paper_data = fetch_arxiv_data(arxiv_id)

    # Ask inclusion/exclusion questions and get both Yes/No and extended answers
    inclusion_criteria_answers = ask_inclusion_criteria_with_langchain(
        llm=llm,
        title=paper_data['title'],
        abstract=paper_data['abstract'],
        keywords=paper_data['keywords'],
        questions=questions
    )

    # Determine if the paper should be included
    should_include = determine_inclusion(inclusion_criteria_answers["yes_no_answers"])
    should_include_list.append({
        "arxiv_id": arxiv_id,
        "should_include": should_include
    })

    # Store detailed answers for further inspection
    detailed_answers_list.append({
        "arxiv_id": arxiv_id,
        "yes_no_answers": inclusion_criteria_answers["yes_no_answers"],
        "extended_answers": inclusion_criteria_answers["extended_answers"]
    })

    # Print results for each paper
    print(f"Should the paper {arxiv_id} be included? {should_include}")
    print("Yes/No Answers:", inclusion_criteria_answers["yes_no_answers"])
    print("Extended Answers:", inclusion_criteria_answers["extended_answers"])

  llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)  # Adjust the model as needed
  simple_response = llm([HumanMessage(content=simple_prompt.format())])


Should the paper 2406.13470 be included? True
Yes/No Answers: {'Q1': True, 'Q2': True, 'Q3': True, 'Q4': True, 'Q5': True, 'Q6': True, 'Q7': True}
Extended Answers: {'Q1': "Yes.\n\nReasoning: The paper's title and abstract are written in English, as evidenced by the use of English vocabulary and grammar throughout the text. The terminology and structure are consistent with academic writing in English, indicating that the paper is indeed written in that language.", 'Q2': 'Yes.\n\nReasoning: The paper describes an empirical study as it involves the evaluation of a proposed automatic speech classification algorithm using a dataset of recorded speeches from both autistic and non-autistic subjects. The mention of testing the classification algorithms on actual collected data indicates that the research is based on empirical evidence gathered from real-world observations or experiments.', 'Q3': 'Yes.\n\nReasoning: The study involves a dataset composed of recorded speeches from both autistic 

#### Results

In [9]:
# Example of what you could do with results
print("\nFinal Inclusion Decisions:")
for result in should_include_list:
    print(result)

print("\nDetailed Responses for Each Paper:")
for result in detailed_answers_list:
    print(result)



Final Inclusion Decisions:
{'arxiv_id': '2406.13470', 'should_include': True}
{'arxiv_id': '2201.00927', 'should_include': True}
{'arxiv_id': '2401.04088', 'should_include': False}

Detailed Responses for Each Paper:
{'arxiv_id': '2406.13470', 'yes_no_answers': {'Q1': True, 'Q2': True, 'Q3': True, 'Q4': True, 'Q5': True, 'Q6': True, 'Q7': True}, 'extended_answers': {'Q1': "Yes.\n\nReasoning: The paper's title and abstract are written in English, as evidenced by the use of English vocabulary and grammar throughout the text. The terminology and structure are consistent with academic writing in English, indicating that the paper is indeed written in that language.", 'Q2': 'Yes.\n\nReasoning: The paper describes an empirical study as it involves the evaluation of a proposed automatic speech classification algorithm using a dataset of recorded speeches from both autistic and non-autistic subjects. The mention of testing the classification algorithms on actual collected data indicates that 