# LLM agents for literature review
Get information about scientific papers and their citations, and check if they are relevant to the PSC disease

## Import libaries and prepare the API credential

In [1]:
from autogen import ConversableAgent, register_function, GroupChatManager, GroupChat
from autogen import config_list_from_json

with open('../openai.credential', 'r') as file:
    key = file.read()
MODEL = 'gpt-4o'

config_list = config_list_from_json(env_or_file="../CONFIG_LIST", filter_dict={"model": MODEL})
config_list[0]["api_key"] = key

## A nested tool agents to retrieve paper info

### Python functions 

A set of functions to get citations and abstracts of a paper, search for a paper from its title, and covert between DOIs and PubMed IDs.


In [None]:
from tools.citation_tools import *

### Tool executor (just to wrap up the function)

In [7]:

lib_tool_executor = ConversableAgent(
    "lib_tool_executor",
    llm_config=False,  # Turn off LLM for this agent.
    code_execution_config=False,
    is_termination_msg=lambda msg: (msg["content"]) and ("terminate" in msg["content"].lower())
)


### Tool driver (caller of the function)

In [16]:
lib_tool_driver = ConversableAgent(
    "lib_tool_driver",
    system_message = "You are a helpful assistant to retrieve information of scientific papers, using some external tools search papers from their titles, get their citations, titles, abstracts, etc. The papers are in most cases identified by their DOIs"
    "Once you received the result you needed from the tools, or you believe the result from the tools satisfy the task you were assigned, say 'TERMINATE' immediately. Do not try to repeat, summarize or analyze the results. Also, try to do one task each time. Once a task or subtask assigned to you was done, say 'TERMINATE', rather than jumpting into the next task immediately",
    llm_config = {"config_list": config_list},
    code_execution_config=False
)

### Register the tool to both caller and executor


In [None]:
register_function(
    get_title_abstract,
    caller = lib_tool_driver,
    executor = lib_tool_executor,
    description = "Get the title and abstract of a paper. The input is the PMID (pubmed id) of this paper"
)

# register_function(
#     get_pmid_from_doi,
#     caller = lib_tool_driver,
#     executor = lib_tool_executor,
#     description = "Get the PMID (pubmed id) of a paper from its DOI as input"
# )

# register_function(
#     get_doi_from_pmid,
#     caller = lib_tool_driver,
#     executor = lib_tool_executor,
#     description = "Get the DOI of a paper from its PMID (pubmed id) as input"
# )

register_function(
    get_pmid_from_title,
    caller = lib_tool_driver,
    executor = lib_tool_executor,
    description = "Search a paper from its title. The input is the title, and the output is the PMID of this paper"
)

register_function(
    get_citations,
    caller = lib_tool_driver,
    executor = lib_tool_executor,
    description = "Get the citations of a paper. The input is the DOI of the source paper, and the output is a list of DOIs of the papers that the source paper has cited"
)

register_function(
    get_doi_from_title,
    caller = lib_tool_driver,
    executor = lib_tool_executor,
    description = "Search a paper from its title. The input is the title and the putput is the DOI of the found paper"
)

register_function(
    get_title_abstract_from_doi,
    caller = lib_tool_driver,
    executor = lib_tool_executor,
    description = "Get the title and abstract of a paper. The input is the DOI of this paper"
)


### Make the nested agent for tools. 
Now lib_tool_executor is the only one exposed to the group

In [18]:
from tools.tools import second_last_msg

nested_chats = [
    {
        "recipient": lib_tool_driver,
        "max_turns": 4,
        "summary_method": second_last_msg
    }
]

lib_tool_executor.register_nested_chats(
    nested_chats, 
    trigger = lambda sender: sender not in [lib_tool_driver]
)

## Agent to the read the abstract and check if the paper is about PSC or not.

In [19]:
chatbot_system_message = "You are a helpful assistant. You will receive the abstract of a scientific paper, and you judge if this paper is about a research for the PSC (Primary Sclerosing Cholangitis) disease. Please answer with three lines: 1) 'My review for paper <it's DOI>:'; 2) simply 'YES' or 'No'; 3) If it is, please describe briefly what the research is about (in on sentence), If not, please give a short explanation why you don't think it is."

psc_checker = ConversableAgent(
    "psc_checker",
    system_message = chatbot_system_message,
    llm_config = {"config_list": config_list},
    code_execution_config=False,  # Turn off code execution for this agent.
)

## Note taking agents
Tools to save key information into a knowledge graph, and query the graph for saved information.

In [20]:
from tools.neo4j import query_neo4j

graph_tool_executor = ConversableAgent(
    "graph_tool_executor",
    llm_config=False,  # Turn off LLM for this agent.
    code_execution_config=False,
    is_termination_msg=lambda msg: (msg["content"]) and ("terminate" in msg["content"].lower())
)

graph_tool_driver = ConversableAgent(
    "graph_tool_driver",
    system_message = "You are taking notes about scientific paper mentiond by others."
        "Each paper is identified by DOI, and may have title and abstract."
        "You have a tool helping you saving paper information to a Neo4j graph, and to query informaiton you saved"
        "When you receive a paper, with DOI, title or abstract, save them."
        "Once it is succesfully save, say TERMINATE to avoid the tool executing unnecessary tasks"
        "When you were asked about papers you saved, query the Neo4j graph via the tool."
        "Once you received the satisfied results from the query, summarize them in concise text," "followed by 'TERMINATE' to stop the tool from querying more.",
    llm_config = {"config_list": config_list},
    code_execution_config=False
)

register_function(
    query_neo4j,
    caller = graph_tool_driver,
    executor = graph_tool_executor,
    description = "Query or modify the neo4j graph database. The input is a cypher query, and the output is a list of records returned from the query. The graph has the following schema: There are only two labels for nodes: 'Paper' and 'Review'. "
        "A Paper has properties: doi (its id), title, and abstract"
        "A Review has properties: isPsc (YES, NO), reviewdBy, and comment"
        "Two labels for relations: 'Cites' (a paper cites another paper), and 'ReviewOf' (a Review is a review of a Paper)"
        "Each node or relation should have one and only one label"
        "Note that The property existence syntax `... exists(variable.property)` is no longer supported by new Cypher. Please use `variable.property IS NOT NULL` instead."
        "Remember that DOI is the id of Paper. When you search or match a paper, try to use DOI as long as it is available."
        "When you are asked to create many nodes (e.g., to create many papers using their DOIs), try to use the batch and UNWIND keyword in Cypher to do creation/merging in one Cypher query, instead of doing them one by one"
)

nested_chats = [
    {
        "recipient": graph_tool_driver,
        "max_turns": 4,
        "summary_method": "last_msg"
    }
]

graph_tool_executor.register_nested_chats(
    nested_chats, 
    trigger = lambda sender: sender not in [graph_tool_driver]
)

## Create the group of agents to conduct the task

### A moderator agent to plan and guide the group chat

In [21]:
moderator = ConversableAgent(
    "moderator",
    system_message = "You are an AI-based moderator that makes plans for the whole group."
    "When you get a task, break down it into sub tasks, and each can be performed by one of your 'partner agents'."
    "You will get an introduction about what each of your partner agents can do.",
    llm_config = {"config_list": config_list},
    code_execution_config=False,  # Turn off code execution for this agent.
    human_input_mode = "ALWAYS"
)


### Create the group chat and assign it to a group manager

In [22]:
lib_tool_executor.description = "I have a couple of tools to fetch paper informaiton from PubMed. I can search the DOI or PubMed Id from title, get the abstract of a paper, and find the citations of the paper"
psc_checker.description = "I can read a paragraph of English text (such as the abstract of a paper), and try to tell if the paper is about the research on the PSC disease"
moderator.description = "I am the moderator, and I break down a task into subtasks. I should always be the one to speak first, to make plan for sub-tasks"
graph_tool_executor.description = "I am the note-taker. I record the papers, citations and reviews mentioned by other speakers in a database, and help you query from what I noted."


group_chat = GroupChat(
    agents = [lib_tool_executor, psc_checker, graph_tool_executor, moderator],
    messages = [],
    max_round = 20,
    send_introductions = True,
    allow_repeat_speaker = False
)

group_chat_manager = GroupChatManager(
    groupchat = group_chat,
    system_message = "You are an assistent to manage who speak next. ",
        # "Remember that once a paper is mentioned, or its abstracts or citations, or the review of a paper, is posted in the graph chat, you should ask the note-taker to speak next, so that it records the paper in the database",
    llm_config = {"config_list": config_list}
)

## Initialize the knowledge graph with on sample paper

In [None]:
'''initialize(group_chat_manager, lib_tool_executor,
            message="""Let's start with paper 10.1126/scitranslmed.abb3107. Please get its title and abstract. After that, save the following review to the existing paper with DOI 10.1126/scitranslmed.abb3107: Xiaojun has reviewed the paper (DOI: 10.1126/scitranslmed.abb3107) and judges that is a paper about PSC, and she commented that it is one of the primary work in this field. For this paper, no need to ask an additional opinion on whether it is about PSC or not.""")'''

## Expand a YES paper with its references

In [None]:
'''initialize(group_chat_manager, lib_tool_executor,
            message="""Your task is as follows: 
            1. Please query from the notes (the knowledge graph) to find one paper which has been Reviewed as YES for PSC research, and get its DOI.
            2. If you find such as paper, retrieve all its citations, as a list of DOIs.
            3. For each DOI in the citation list, create a Paper node in the knowledge graph, with only the property of DOI, and create a 'Cites' relation from the original paper (matched by its DOI) to the newly created paper. 
            Since there will normally be many citations, 
            please use a batch of data and the UNWIND keyword to create/merge these nodes in one cypher query.""")'''

## Review a paper

## The "main" entry point of function calls.

In [None]:
from tools.tools import initialize

# expand_references()
for i in range (0, 2):
    # REVIEWS THE PAPER
    initialize(group_chat_manager, lib_tool_executor,
    message="""Your task is as follows: 
            1. Query your note (the graph) to find one paper which has not been reviewed. Done for the graph tool.
            2. Use another tool to retrieve the title and abstract of this paper, and save the details into the graph.
            3. Ask the psc_checker to see if it is about psc
            4. Save the Review about this paper, with YES/NO and comment, and record that this review is done by generative AI""")