# Using tool to get information about a country, from wikipedia




## Import libaries and prepare the API credential

In [1]:
pip install Biopython

Note: you may need to restart the kernel to use updated packages.


In [2]:

import requests
from Bio import Entrez
from typing import List

from autogen import ConversableAgent, register_function, GroupChatManager, GroupChat
from autogen.coding import LocalCommandLineCodeExecutor

with open('openai.credential', 'r') as file:
    key = file.read()
MODEL = 'gpt-4o'

## A Python function to get citations. 
The input is a DOI of the source paper, and the output is a list of DOIs of the papers that the source paper has cited.


In [3]:
def get_citations(doi:str) -> str:
    url = f"https://api.crossref.org/works/{doi}"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        references = data.get("message", {}).get("reference", [])
        return ", ".join([ref.get('DOI', 'Unknown DOI') for ref in references])
    else:
        print(f"Error: {response.status_code}")
        return ""
    
get_citations("10.1126/scitranslmed.abb3107")

'10.1016/j.immuni.2012.09.020, 10.1016/j.immuni.2016.07.007, 10.1016/j.immuni.2016.11.005, 10.1016/j.immuni.2016.08.015, 10.1038/nbt.3942, 10.1016/j.immuni.2013.04.004, 10.4049/jimmunol.1200402, 10.4049/jimmunol.176.4.2079, 10.1016/j.celrep.2017.08.078, 10.1016/j.jhep.2017.07.022, 10.1038/nri.2016.34, 10.1016/j.cell.2015.03.031, 10.1126/science.aac9593, Unknown DOI, 10.1016/j.cell.2016.02.048, 10.1016/j.cub.2017.05.073, Unknown DOI, 10.1038/s41385-018-0016-5, 10.1038/ni.2744, 10.1126/sciimmunol.aas9673, 10.4049/jimmunol.1500515, 10.1038/nri3567, 10.1016/S0140-6736(13)60096-3, 10.1056/NEJMra1506330, 10.4049/jimmunol.157.7.3183, 10.1111/j.1440-1827.2006.01913.x, 10.1016/S0168-8278(03)00481-1, 10.1084/jem.20162115, 10.1016/j.celrep.2019.12.050, 10.4049/jimmunol.1701704, 10.1053/j.gastro.2014.04.003, 10.1038/ni.3589, 10.1038/nature21379, 10.1126/sciimmunol.abb4432, 10.1016/j.immuni.2016.08.011, 10.1016/j.it.2018.06.003, 10.1146/annurev-immunol-032713-120145, 10.1007/s00281-009-0167-2, 10.1

## A python function to get abstract from a DOI.
Input: DOI in string. Output: Text of the title and abstract

In [4]:
def get_title_abstract(pmid: str) -> str:

    Entrez.email = 'hui.song@sintef.no' 
    handle = Entrez.efetch(db='pubmed', id=pmid, retmode='xml')
    records = Entrez.read(handle)
    handle.close()
    
    # Step 3: Parse the XML to extract title and abstract
    article = records['PubmedArticle'][0]['MedlineCitation']['Article']
    
    # Extract the title
    title = article.get('ArticleTitle', 'No title available.')
    
    # Extract the abstract
    abstract_paragraphs = article.get('Abstract', {}).get('AbstractText', [])
    
    if isinstance(abstract_paragraphs, list):
        abstract = ' '.join(str(para) for para in abstract_paragraphs)
    else:
        abstract = str(abstract_paragraphs)
    
    return "Title: {} \nAbstract: {}".format(title, abstract)
    

In [5]:

def get_pmid_from_doi(doi: str)->str:

    Entrez.email = 'hui.song@sintef.no'  

    handle = Entrez.esearch(db='pubmed', term='"{}" [DOI]'.format(doi))
    record = Entrez.read(handle)
    handle.close()

    pmid_list = record.get('IdList', [])

    if pmid_list:
        return pmid_list[0]
    else:
        return 'No PMID found for DOI: {}'.format(doi)
    

def get_pmid_from_title(title:str)->str:
    """
    Searches PubMed for a paper by title and returns the PMID.

    Parameters:
    title (str): The title of the paper.

    Returns:
    str: The PMID of the paper if found.
    """
    # Set your email (required by NCBI)
    Entrez.email = "hui.song@sintef.no"  # Replace with your email address

    # Search for the paper using its title
    search_handle = Entrez.esearch(db="pubmed", term=title, retmode="xml")
    search_results = Entrez.read(search_handle)
    search_handle.close()

    # Get the list of PubMed IDs
    id_list = search_results.get("IdList", [])

    if not id_list:
        print("No articles found with the given title.")
        return ""
    else:
        # Return the first PMID (assuming it's the most relevant)
        return id_list[0]

def get_doi_from_pmid(pmid:str)->str:
    """
    Fetches the DOI of a paper given its PMID.

    Parameters:
    pmid (str): The PubMed ID of the paper.

    Returns:
    str or None: The DOI of the paper if found, else None.
    """
    # Set your email (required by NCBI)
    Entrez.email = "hui.song@sintef.no"  # Replace with your email address

    # Fetch the article details
    fetch_handle = Entrez.efetch(db="pubmed", id=pmid, retmode="xml")
    fetch_results = Entrez.read(fetch_handle)
    fetch_handle.close()

    # Extract the DOI from the article data
    try:
        article = fetch_results['PubmedArticle'][0]
        article_ids = article['PubmedData']['ArticleIdList']

        for id_element in article_ids:
            if id_element.attributes.get('IdType') == 'doi':
                return str(id_element)
        print("DOI not found in the article data.")
        return ""
    except (IndexError, KeyError) as e:
        print(f"Error retrieving DOI: {e}")
        return ""

## Agent 1: tool executor (just to wrap up the function)

In [6]:

lib_tool_executor = ConversableAgent(
    "lib_tool_executor",
    llm_config=False,  # Turn off LLM for this agent.
    code_execution_config=False,
    is_termination_msg=lambda msg: (msg["content"]) and ("terminate" in msg["content"].lower())
)


### Agent 2: Tool driver (caller of the function)

In [7]:
lib_tool_driver = ConversableAgent(
    "lib_tool_driver",
    system_message = "You are a helpful assistant to help users understand scientific publication. You may decide to use some external tools to search papers, with their citations, titles, abstracts, if necessary."
    "Once you received the result you needed, or you believe the result from the tools satisfy the task, say 'TERMINATE' - no need to summarize or analyze the results",
    llm_config = {"config_list": [{"model": MODEL, "api_key": key}]},
    code_execution_config=False
)

### Register the tool to both caller and executor

*Question:* Why we need tool agents to use a tool?

In [8]:
register_function(
    get_title_abstract,
    caller = lib_tool_driver,
    executor = lib_tool_executor,
    description = "Get the title and abstract of a paper. The input is the PMID (pubmed id) of this paper"
)

register_function(
    get_pmid_from_doi,
    caller = lib_tool_driver,
    executor = lib_tool_executor,
    description = "Get the PMID (pubmed id) of a paper from its DOI as input"
)

register_function(
    get_doi_from_pmid,
    caller = lib_tool_driver,
    executor = lib_tool_executor,
    description = "Get the DOI of a paper from its PMID (pubmed id) as input"
)

register_function(
    get_pmid_from_title,
    caller = lib_tool_driver,
    executor = lib_tool_executor,
    description = "Search a paper from its title. The input is the title, and the output is the PMID of this paper"
)

register_function(
    get_citations,
    caller = lib_tool_driver,
    executor = lib_tool_executor,
    description = "Get the citations of a paper. The input is the DOI of the source paper, and the output is a list of DOIs of the papers that the source paper has cited"
)



## Make the nested agent for tools

In [9]:
def second_last_msg(sender: ConversableAgent, recipient: ConversableAgent, summary_args: dict):
    return sender.chat_messages[recipient][-2]["content"]

nested_chats = [
    {
        "recipient": lib_tool_driver,
        "max_turns": 4,
        "summary_method": second_last_msg
    }
]

lib_tool_executor.register_nested_chats(
    nested_chats, 
    trigger = lambda sender: sender not in [lib_tool_driver]
)

## Agent to the read the abstract and check if the paper is about PSC or not.

In [10]:
chatbot_system_message = "You are a helpful assistant. You will receive the abstract of a scientific paper, and you judge if this paper is about a research for the PSC (Primary Sclerosing Cholangitis) disease. Please answer with simply a 'YES' or 'No' as the first line. If it is, please describe briefly what the research is about (in on sentence), If not, please give a short explanation why you don't think it is"

psc_checker = ConversableAgent(
    "psc_checker",
    system_message = chatbot_system_message,
    llm_config = {"config_list": [{"model": MODEL, "api_key": key}]},
    code_execution_config=False,  # Turn off code execution for this agent.
)

In [11]:
architect = ConversableAgent(
    "architect",
    system_message = "You are an AI-based architect that make plans for the whole group."
    "When you get a task, break down it into sub tasks, and each can be performed by one of your 'partner agents'."
    "You will get an introduction about what each of your partner agents can do.",
    llm_config = {"config_list": [{"model": MODEL, "api_key": key}]},
    code_execution_config=False,  # Turn off code execution for this agent.
    human_input_mode = "ALWAYS"
)


## Now we make the group

In [12]:
lib_tool_executor.description = "I have a couple of tools to fetch paper informaiton from PubMed. I can search the DOI or PubMed Id from title, get the abstract of a paper, and find the citations of the paper"
psc_checker.description = "I can read a paragraph of English text (such as the abstract of a paper), and try to tell if the paper is about the research on the PSC disease"
architect.description = "I am the architect, and I break down a task into subtasks. I should always be the one to speak first, to make plan for sub-tasks"


group_chat = GroupChat(
    agents = [lib_tool_executor, psc_checker, architect],
    messages = [],
    max_round = 20,
    send_introductions = True,
    allow_repeat_speaker = False
)

group_chat_manager = GroupChatManager(
    groupchat = group_chat,
    llm_config = {"config_list": [{"model": MODEL, "api_key": key}]}
)



In [13]:
chat_result = lib_tool_executor.initiate_chat(
    group_chat_manager,
    max_round = 6,
    message = "I have paper titled 'A biliary immune landscape map of primary sclerosing cholangitis reveals a dominant network of neutrophils and tissue-resident T cells'. Can you get all its citations, as a list DOIs. This ends the first sub-task, and then the 'architect' should plan for the next sub-task: take the 10th citation (DOI), get its abstract and check if it is about PSC. Repeat this for the 11th and 23rd citation."
)

[33mlib_tool_executor[0m (to chat_manager):

I have paper titled 'A biliary immune landscape map of primary sclerosing cholangitis reveals a dominant network of neutrophils and tissue-resident T cells'. Can you get all its citations, as a list DOIs. This ends the first sub-task, and then the 'architect' should plan for the next sub-task: take the 10th citation (DOI), get its abstract and check if it is about PSC. Repeat this for the 11th and 23rd citation.

--------------------------------------------------------------------------------
[32m
Next speaker: architect
[0m
[31m
>>>>>>>> NO HUMAN INPUT RECEIVED.[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33marchitect[0m (to chat_manager):

To break this down into manageable subtasks, we can proceed as follows:

1. **Task 1: Retrieve Citations List**
   - **Assigned to:** `lib_tool_executor`
   - **Action:** Fetch the list of citation DOIs for the paper titled "A biliary immune landscape map of primary sclerosing cholangitis reveals 