# LLM agents for literature review
Get information about scientific papers and their citations, and check if they are relevant to the PSC disease

## Import libaries and prepare the API credential

In [1]:
from autogen import ConversableAgent, register_function, GroupChatManager, GroupChat
from autogen import config_list_from_json

with open('../openai.credential', 'r') as file:
    key = file.read()
MODEL = 'gpt-4o'

config_list = config_list_from_json(env_or_file="../CONFIG_LIST", filter_dict={"model": MODEL})
config_list[0]["api_key"] = key

## A nested tool agents to retrieve paper info

### Python functions 

A set of functions to get citations and abstracts of a paper, search for a paper from its title, and covert between DOIs and PubMed IDs.


In [2]:
from tools.citation_tools import *

### Tool driver (caller of the function)

In [3]:
lib_tool = ConversableAgent(
    "lib_tool",
    system_message = "You use your tools to search on PubMed for papers and get their informations. The papers are in most cases identified by their DOIs. Do not try to repeat, summarize or analyze.",
    llm_config = {"config_list": config_list},
    code_execution_config=False
)

### Register the tool to both caller and executor


In [4]:
register_function(
    get_title_abstract,
    caller = lib_tool,
    executor = lib_tool,
    description = "Get the title and abstract of a paper. The input is the PMID (pubmed id) of this paper"
)

register_function(
     get_pmid_from_doi,
     caller = lib_tool,
     executor = lib_tool,
     description = "Get the PMID (pubmed id) of a paper from its DOI as input"
 )

register_function(
     get_doi_from_pmid,
     caller = lib_tool,
     executor = lib_tool,
     description = "Get the DOI of a paper from its PMID (pubmed id) as input"
 )

register_function(
    get_pmid_from_title,
    caller = lib_tool,
    executor = lib_tool,
    description = "Search a paper from its title. The input is the title, and the output is the PMID of this paper"
)

register_function(
    get_citations,
    caller = lib_tool,
    executor = lib_tool,
    description = "Get the citations of a paper. The input is the DOI of the source paper, and the output is a list of DOIs of the papers that the source paper has cited"
)

register_function(
    get_doi_from_title,
    caller = lib_tool,
    executor = lib_tool,
    description = "Search a paper from its title. The input is the title and the putput is the DOI of the found paper"
)

register_function(
    get_title_abstract_from_doi,
    caller = lib_tool,
    executor = lib_tool,
    description = "Get the title and abstract of a paper. The input is the DOI of this paper"
)


## Agent to the read the abstract and check if the paper is about PSC or not.

In [5]:
chatbot_system_message = """You will receive the abstract of a scientific paper, and you have to judge if this paper is a research about PSC (Primary Sclerosing Cholangitis) disease. 
Please answer with three lines: 
1) 'My review for paper '<its DOI>:'; 
2) 'YES' or 'NO' for the judgement; 
3) If the answer is positive, describe briefly what the research is about (one sentence). 
   If not, give a short explanation why you don't think it is.
4) 5 keywords that describe the main topics

A paper is about PSC if it talks about medical, biological or something in between regarding illnesses.
"""

psc_checker = ConversableAgent(
    "psc_checker",
    system_message = chatbot_system_message,
    llm_config = {"config_list": config_list},
    code_execution_config=False,  # Turn off code execution for this agent.
    description = "I can read a paragraph of English text (such as the abstract of a paper), and try to tell if the paper is about the research on the PSC disease"
)

## Note taking agents
Tools to save key information into a knowledge graph, and query the graph for saved information.

In [6]:
from tools.CypherExecutor import CypherCodeExecutor

query_executor = ConversableAgent(
    "query_executor",
    llm_config=False,  # Turn off LLM for this agent.
    code_execution_config={"executor": CypherCodeExecutor()},
    description="I can run Cypher code. I don't generate it."
)

graph_tool_driver = ConversableAgent(
    "graph_tool_driver",
    system_message = """ 
        Use the provided schema to understand how the database is organized.
        Use ```cypher <query>``` to generate Cypher queries to store papers' information or to retrieve already stored ones.
        Do not repeat or summarize tasks and results.""",
    llm_config = {"config_list": config_list},
    code_execution_config = False,
    description = "I save and query papers, citations and reviews in a database."
)

## Create the group of agents to conduct the task

### A moderator agent to plan and guide the group chat

In [7]:
moderator = ConversableAgent(
    "moderator",
    system_message = "You plan jobs for the whole group."
    "Break down the task in sub tasks, each can be performed by one of your 'partner agents'."
    "If all the sub-tasks are completed you can reply with 'TERMINATE'",
    llm_config = {"config_list": config_list},
    description = "I am the moderator, and I break down a task into subtasks."
)


### Create the group chat and assign it to a group manager

In [8]:
from tools.tools import _reset_agents, retrieve_content

#TODO: change this to be based on what moderator say
def state_transition(last_speaker, groupchat):
    if "cypher" in groupchat.messages[-1]["content"]: return query_executor
    elif last_speaker is ret: return moderator
    elif last_speaker is moderator: return lib_tool
    elif last_speaker is lib_tool: 
        if "tool_calls" in groupchat.messages[-1]: return lib_tool
        else: return psc_checker
    elif last_speaker is psc_checker: return graph_tool_driver
    else: return moderator


ret = retrieve_content()

group_chat = GroupChat(
    agents = [moderator, ret, lib_tool, psc_checker, graph_tool_driver, query_executor],
    messages  = [],
    max_round = 10,
    send_introductions   = True,
    speaker_selection_method="auto"
)

group_chat_manager = GroupChatManager(
    groupchat = group_chat,
    system_message = "You manage who speak next. ",
        # "Remember that once a paper is mentioned, or its abstracts or citations, or the review of a paper, is posted in the graph chat, you should ask the note-taker to speak next, so that it records the paper in the database",
    llm_config = {"config_list": config_list},
    is_termination_msg= lambda m: "TERMINATE" in m["content"][-14:].upper()
)

## Initialize the knowledge graph with on sample paper

In [9]:
_reset_agents(group_chat)
ret.initiate_chat(group_chat_manager,
           message=ret.message_generator,
           problem="""Let's start with paper 10.1126/scitranslmed.abb3107. Please get its title and abstract and save it. After that, save the following review to the existing paper with DOI 10.1126/scitranslmed.abb3107: Xiaojun has reviewed the paper (DOI: 10.1126/scitranslmed.abb3107) and judges that is a paper about PSC, and she commented that it is one of the primary work in this field. For this paper, no need to ask an additional opinion on whether it is about PSC or not.""")

Trying to create collection.


  from tqdm.autonotebook import tqdm, trange
Number of requested results 20 is greater than number of elements in index 1, updating n_results = 1


doc_ids:  [['doc_0']]
[32mAdding content of doc doc_0 to context.[0m
[33mretriever[0m (to chat_manager):

DATABASE SCHEMA
[
    {
        "nodes": [],
        "relationships": []
    }
]


TASK
Let's start with paper 10.1126/scitranslmed.abb3107. Please get its title and abstract and save it. After that, save the following review to the existing paper with DOI 10.1126/scitranslmed.abb3107: Xiaojun has reviewed the paper (DOI: 10.1126/scitranslmed.abb3107) and judges that is a paper about PSC, and she commented that it is one of the primary work in this field. For this paper, no need to ask an additional opinion on whether it is about PSC or not.

--------------------------------------------------------------------------------
[32m
Next speaker: lib_tool
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mlib_tool[0m (to chat_manager):

[32m***** Suggested tool call (call_oyjpROL9WcJC6DCVUwYPibtI): get_pmid_from_doi *****[0m
Arguments: 
{"doi":"10.1126/scitranslmed.abb3107"}
[32m*

[runtime logging] log_function_use: autogen logger is None


[33mlib_tool[0m (to chat_manager):

[33mlib_tool[0m (to chat_manager):

[32m***** Response from calling tool (call_oyjpROL9WcJC6DCVUwYPibtI) *****[0m
34162753
[32m**********************************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: lib_tool
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mlib_tool[0m (to chat_manager):

[32m***** Suggested tool call (call_lEzIdeGaKMVnt7rLGfifx1d8): get_title_abstract *****[0m
Arguments: 
{"pmid":"34162753"}
[32m***********************************************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: lib_tool
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[35m
>>>>>>>> EXECUTING FUNCTION get_title_abstract...[0m


[runtime logging] log_function_use: autogen logger is None


[33mlib_tool[0m (to chat_manager):

[33mlib_tool[0m (to chat_manager):

[32m***** Response from calling tool (call_lEzIdeGaKMVnt7rLGfifx1d8) *****[0m
Title: A biliary immune landscape map of primary sclerosing cholangitis reveals a dominant network of neutrophils and tissue-resident T cells. 
Abstract: The human biliary system, a mucosal barrier tissue connecting the liver and intestine, is an organ often affected by serious inflammatory and malignant diseases. Although these diseases are linked to immunological processes, the biliary system represents an unexplored immunological niche. By combining endoscopy-guided sampling of the biliary tree with a high-dimensional analysis approach, comprehensive mapping of the human biliary immunological landscape in patients with primary sclerosing cholangitis (PSC), a severe biliary inflammatory disease, was conducted. Major differences in immune cell composition in bile ducts compared to blood were revealed. Furthermore, biliary inflammat

[]

[]

[33mquery_executor[0m (to chat_manager):

exitcode: 0 (execution succeeded)
Code output: 

--------------------------------------------------------------------------------
[32m
Next speaker: moderator
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mmoderator[0m (to chat_manager):

TERMINATE

--------------------------------------------------------------------------------


ChatResult(chat_id=None, chat_history=[{'content': 'DATABASE SCHEMA\n[\n    {\n        "nodes": [],\n        "relationships": []\n    }\n]\n\n\nTASK\nLet\'s start with paper 10.1126/scitranslmed.abb3107. Please get its title and abstract and save it. After that, save the following review to the existing paper with DOI 10.1126/scitranslmed.abb3107: Xiaojun has reviewed the paper (DOI: 10.1126/scitranslmed.abb3107) and judges that is a paper about PSC, and she commented that it is one of the primary work in this field. For this paper, no need to ask an additional opinion on whether it is about PSC or not.', 'role': 'assistant'}, {'content': "Hello everyone. We have assembled a great team today to answer questions and solve tasks. In attendance are:\n\nmoderator: I am the moderator, and I break down a task into subtasks.\nretriever: Assistant who has extra content retrieval power.\nlib_tool: You use your tools to search on PubMed for papers and get their informations. The papers are in mo

In [11]:
_reset_agents(group_chat)
ret.initiate_chat(group_chat_manager,
           message=ret.message_generator,
           problem="""The tasks are as follows: 
            1. Find one already reviewed paper as PSC on the graph, and get its DOI.
            2. If you find such a paper, retrieve its citations' DOIs (tools can be used). 
            3. Save each DOI in the citation list with a Paper node and a 'Cites' relation from the original paper (matched by its DOI) to the newly created one. """)

Number of requested results 20 is greater than number of elements in index 1, updating n_results = 1


doc_ids:  [['doc_0']]
[32mAdding content of doc doc_0 to context.[0m
[33mretriever[0m (to chat_manager):

DATABASE SCHEMA
[
    {
        "nodes": [
            {
                "name": "Paper",
                "indexes": [],
                "constraints": [],
                "properties": [
                    "doi",
                    "title",
                    "abstract"
                ]
            },
            {
                "name": "Review",
                "indexes": [],
                "constraints": [],
                "properties": [
                    "comment",
                    "author",
                    "isAboutPSC"
                ]
            }
        ],
        "relationships": [
            {
                "name": "REVIEWS",
                "node1": "Review",
                "node2": "Paper",
                "properties": [
                    null
                ]
            }
        ]
    }
]


TASK
The tasks are as follows: 
            1

[32m
Next speaker: graph_tool_driver
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mgraph_tool_driver[0m (to chat_manager):

```cypher
MATCH (r:Review)-[:REVIEWS]->(p:Paper)
WHERE r.isAboutPSC = true
RETURN p.doi
LIMIT 1
```

--------------------------------------------------------------------------------
[32m
Next speaker: query_executor
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> EXECUTING CODE BLOCK (inferred language is cypher)...[0m


[{'p.doi': '10.1126/scitranslmed.abb3107'}]

[33mquery_executor[0m (to chat_manager):

exitcode: 0 (execution succeeded)
Code output: [{'p.doi': '10.1126/scitranslmed.abb3107'}]

--------------------------------------------------------------------------------
[32m
Next speaker: moderator
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mmoderator[0m (to chat_manager):

- [Step 2] lib_tool: Retrieve the citations' DOIs for the paper with DOI '10.1126/scitranslmed.abb3107'.

--------------------------------------------------------------------------------
[32m
Next speaker: lib_tool
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mlib_tool[0m (to chat_manager):

[32m***** Suggested tool call (call_g69qbhdG6MfC2UfYGkwY3Rl8): get_citations *****[0m
Arguments: 
{"doi":"10.1126/scitranslmed.abb3107"}
[32m******************************************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: lib_tool
[0m
[31m
>>>>>>>> USING AUTO REP

[runtime logging] log_function_use: autogen logger is None


[33mlib_tool[0m (to chat_manager):

[33mlib_tool[0m (to chat_manager):

[32m***** Response from calling tool (call_g69qbhdG6MfC2UfYGkwY3Rl8) *****[0m
10.1016/j.immuni.2012.09.020, 10.1016/j.immuni.2016.07.007, 10.1016/j.immuni.2016.11.005, 10.1016/j.immuni.2016.08.015, 10.1038/nbt.3942, 10.1016/j.immuni.2013.04.004, 10.4049/jimmunol.1200402, 10.4049/jimmunol.176.4.2079, 10.1016/j.celrep.2017.08.078, 10.1016/j.jhep.2017.07.022, 10.1038/nri.2016.34, 10.1016/j.cell.2015.03.031, 10.1126/science.aac9593, Unknown DOI, 10.1016/j.cell.2016.02.048, 10.1016/j.cub.2017.05.073, Unknown DOI, 10.1038/s41385-018-0016-5, 10.1038/ni.2744, 10.1126/sciimmunol.aas9673, 10.4049/jimmunol.1500515, 10.1038/nri3567, 10.1016/S0140-6736(13)60096-3, 10.1056/NEJMra1506330, 10.4049/jimmunol.157.7.3183, 10.1111/j.1440-1827.2006.01913.x, 10.1016/S0168-8278(03)00481-1, 10.1084/jem.20162115, 10.1016/j.celrep.2019.12.050, 10.4049/jimmunol.1701704, 10.1053/j.gastro.2014.04.003, 10.1038/ni.3589, 10.1038/nature21379,

[]

[33mquery_executor[0m (to chat_manager):

exitcode: 0 (execution succeeded)
Code output: 

--------------------------------------------------------------------------------
[32m
Next speaker: moderator
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mmoderator[0m (to chat_manager):

TERMINATE

--------------------------------------------------------------------------------


ChatResult(chat_id=None, chat_history=[{'content': 'DATABASE SCHEMA\n[\n    {\n        "nodes": [\n            {\n                "name": "Paper",\n                "indexes": [],\n                "constraints": [],\n                "properties": [\n                    "doi",\n                    "title",\n                    "abstract"\n                ]\n            },\n            {\n                "name": "Review",\n                "indexes": [],\n                "constraints": [],\n                "properties": [\n                    "comment",\n                    "author",\n                    "isAboutPSC"\n                ]\n            }\n        ],\n        "relationships": [\n            {\n                "name": "REVIEWS",\n                "node1": "Review",\n                "node2": "Paper",\n                "properties": [\n                    null\n                ]\n            }\n        ]\n    }\n]\n\n\nTASK\nThe tasks are as follows: \n            1. Find one alread