# LLM agents for literature review
Get information about scientific papers and their citations, and check if they are relevant to the PSC disease

## Import libaries and prepare the API credential

In [4]:
from autogen import ConversableAgent, register_function, GroupChatManager, GroupChat
from autogen import config_list_from_json

with open('../openai.credential', 'r') as file:
    key = file.read()
MODEL = 'gpt-4o-mini'

config_list = config_list_from_json(env_or_file="../CONFIG_LIST", filter_dict={"model": MODEL})
config_list[0]["api_key"] = key

## A nested tool agents to retrieve paper info

### Python functions 

A set of functions to get citations and abstracts of a paper, search for a paper from its title, and covert between DOIs and PubMed IDs.


In [5]:
from tools.citation_tools import *

### Tool driver (caller of the function)

In [6]:
lib_tool = ConversableAgent(
    "lib_tool",
    system_message = "You use your tools to search only for papers and get their informations. The papers are in most cases identified by their DOIs. Do not try to repeat, summarize or analyze.",
    llm_config = {"config_list": config_list},
    code_execution_config=False
)

### Register the tool to both caller and executor


In [7]:
register_function(
    get_title_abstract,
    caller = lib_tool,
    executor = lib_tool,
    description = "Get the title and abstract of a paper. The input is the PMID (pubmed id) of this paper"
)

register_function(
     get_pmid_from_doi,
     caller = lib_tool,
     executor = lib_tool,
     description = "Get the PMID (pubmed id) of a paper from its DOI as input"
 )

register_function(
     get_doi_from_pmid,
     caller = lib_tool,
     executor = lib_tool,
     description = "Get the DOI of a paper from its PMID (pubmed id) as input"
 )

register_function(
    get_pmid_from_title,
    caller = lib_tool,
    executor = lib_tool,
    description = "Search a paper from its title. The input is the title, and the output is the PMID of this paper"
)

register_function(
    get_citations,
    caller = lib_tool,
    executor = lib_tool,
    description = "Get the citations of a paper. The input is the DOI of the source paper, and the output is a list of DOIs of the papers that the source paper has cited"
)

register_function(
    get_doi_from_title,
    caller = lib_tool,
    executor = lib_tool,
    description = "Search a paper from its title. The input is the title and the putput is the DOI of the found paper"
)

register_function(
    get_title_abstract_from_doi,
    caller = lib_tool,
    executor = lib_tool,
    description = "Get the title and abstract of a paper. The input is the DOI of this paper"
)


## Agent to the read the abstract and check if the paper is about PSC or not.

In [19]:
chatbot_system_message = """You will receive the abstract of a scientific paper, and you judge if this paper is  a research about PSC (Primary Sclerosing Cholangitis) disease. 
Please answer with three lines: 
1) 'My review for paper '<its DOI>:'; 
2) simply 'YES' or 'NO'; 
3) If the answer is positive, describe briefly what the research is about (one sentence). 
   If not, give a short explanation why you don't think it is.
4) 5 keywords that describe the main topics

A paper is about PSC if it talks about medical, biological or something in between regarding illnesses.
"""

psc_checker = ConversableAgent(
    "psc_checker",
    system_message = chatbot_system_message,
    llm_config = {"config_list": config_list},
    code_execution_config=False,  # Turn off code execution for this agent.
    description = "I can read a paragraph of English text (such as the abstract of a paper), and try to tell if the paper is about the research on the PSC disease"
)

## Note taking agents
Tools to save key information into a knowledge graph, and query the graph for saved information.

In [15]:
from tools.CypherExecutor import CypherCodeExecutor

graph_tool_executor = ConversableAgent(
    "graph_tool_executor",
    llm_config=False,  # Turn off LLM for this agent.
    code_execution_config={"executor": CypherCodeExecutor()},
    description="I can run Cypher code if it is passed in Cypher format. I cannot directly generate queries."
)

graph_tool_driver = ConversableAgent(
    "graph_tool_driver",
    system_message = "You MUST generate Cypher queries in Cypher format to save and query scientific papers and their review in the database."
        "You can use the provided schema to understand how the database is organized."
        "Do not repeat or summarize tasks and results.",
    llm_config = {"config_list": config_list},
    code_execution_config = False,
    description = "I save and query papers, citations and reviews in a database."
)

## Create the group of agents to conduct the task

### A moderator agent to plan and guide the group chat

In [16]:
moderator = ConversableAgent(
    "moderator",
    system_message = "You plan jobs for the whole group."
    "When you get a task, break down it into sub tasks, and each can be performed by one of your 'partner agents'."
    "Description of partner agent is provided so you can decide which best fits for a certain sub task."
    "If all the sub-tasks are completed you can reply with 'TERMINATE'",
    llm_config = {"config_list": config_list},
    description = "I am the moderator, and I break down a task into subtasks."
)


### Create the group chat and assign it to a group manager

In [20]:
from tools.tools import _reset_agents, retrieve_content

#TODO: change this to be based on what moderator say
def state_transition(last_speaker, groupchat):
    if "cypher" in groupchat.messages[-1]["content"]: return graph_tool_executor
    elif last_speaker is ret: return moderator
    elif last_speaker is moderator: return lib_tool
    elif last_speaker is lib_tool: 
        if "tool_calls" in groupchat.messages[-1]: return lib_tool
        else: return psc_checker
    elif last_speaker is psc_checker: return graph_tool_driver
    else: return moderator


ret = retrieve_content()

group_chat = GroupChat(
    agents = [ret, lib_tool, psc_checker, graph_tool_driver, graph_tool_executor, moderator],
    messages  = [],
    max_round = 10,
    send_introductions   = True,
    speaker_selection_method = state_transition,
    allow_repeat_speaker = False
)

group_chat_manager = GroupChatManager(
    groupchat = group_chat,
    system_message = "You are an assistent to manage who speak next. ",
        # "Remember that once a paper is mentioned, or its abstracts or citations, or the review of a paper, is posted in the graph chat, you should ask the note-taker to speak next, so that it records the paper in the database",
    llm_config = {"config_list": config_list},
    is_termination_msg= lambda m: "TERMINATE" in m["content"][-14:].upper()
)

## Initialize the knowledge graph with on sample paper

In [21]:
_reset_agents(group_chat)
ret.initiate_chat(group_chat_manager,
           message=ret.message_generator,
           problem="""Let's start with paper 10.1126/scitranslmed.abb3107. Please get its title and abstract. After that, save the following review to the existing paper with DOI 10.1126/scitranslmed.abb3107: Xiaojun has reviewed the paper (DOI: 10.1126/scitranslmed.abb3107) and judges that is a paper about PSC, and she commented that it is one of the primary work in this field. For this paper, no need to ask an additional opinion on whether it is about PSC or not.""")

Trying to create collection.


Number of requested results 20 is greater than number of elements in index 3, updating n_results = 3


doc_ids:  [['doc_0', 'doc_1', 'doc_2']]
[32mAdding content of doc doc_0 to context.[0m
[32mAdding content of doc doc_1 to context.[0m
[32mAdding content of doc doc_2 to context.[0m
[33mretriever[0m (to chat_manager):

DATABASE SCHEMA
[
    {
        "nodes": [
            {
                "name": "Paper",
                "indexes": [],
                "constraints": [],
                "properties": [
                    "doi",
                    "title",
                    "abstract"
                ]
            },
            {
                "name": "Review",
                "indexes": [],
                "constraints": [],
                "properties": [
                    "content",
                    "reviewer"
                ]
            },
            {
                "name": "Reviewer",
                "indexes": [],
                "constraints": [],
                "properties": [
                    "name",
                    "judgement",
                

[runtime logging] log_function_use: autogen logger is None


[33mlib_tool[0m (to chat_manager):

[33mlib_tool[0m (to chat_manager):

[32m***** Response from calling tool (call_1bAI0AVRzFmdAnbI5Z65p2R1) *****[0m
Title: A biliary immune landscape map of primary sclerosing cholangitis reveals a dominant network of neutrophils and tissue-resident T cells. 
Abstract: The human biliary system, a mucosal barrier tissue connecting the liver and intestine, is an organ often affected by serious inflammatory and malignant diseases. Although these diseases are linked to immunological processes, the biliary system represents an unexplored immunological niche. By combining endoscopy-guided sampling of the biliary tree with a high-dimensional analysis approach, comprehensive mapping of the human biliary immunological landscape in patients with primary sclerosing cholangitis (PSC), a severe biliary inflammatory disease, was conducted. Major differences in immune cell composition in bile ducts compared to blood were revealed. Furthermore, biliary inflammat

[]

[33mgraph_tool_executor[0m (to chat_manager):

exitcode: 0 (execution succeeded)
Code output: 

--------------------------------------------------------------------------------
[32m
Next speaker: moderator
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mmoderator[0m (to chat_manager):

All subtasks have been successfully completed:

1. The title and abstract of the paper with DOI 10.1126/scitranslmed.abb3107 were retrieved.
2. The review by Xiaojun was successfully saved in the database.

**Title:** A biliary immune landscape map of primary sclerosing cholangitis reveals a dominant network of neutrophils and tissue-resident T cells.

**Abstract:** The human biliary system, a mucosal barrier tissue connecting the liver and intestine, is an organ often affected by serious inflammatory and malignant diseases. Although these diseases are linked to immunological processes, the biliary system represents an unexplored immunological niche. By combining endoscopy-guided sampling of the bil

ChatResult(chat_id=None, chat_history=[{'content': 'DATABASE SCHEMA\n[\n    {\n        "nodes": [\n            {\n                "name": "Paper",\n                "indexes": [],\n                "constraints": [],\n                "properties": [\n                    "doi",\n                    "title",\n                    "abstract"\n                ]\n            },\n            {\n                "name": "Review",\n                "indexes": [],\n                "constraints": [],\n                "properties": [\n                    "content",\n                    "reviewer"\n                ]\n            },\n            {\n                "name": "Reviewer",\n                "indexes": [],\n                "constraints": [],\n                "properties": [\n                    "name",\n                    "judgement",\n                    "comment"\n                ]\n            }\n        ],\n        "relationships": [\n            {\n                "name": "REVIEWED",\n   