In [1]:
from dotenv import load_dotenv
import json
import os
import time
load_dotenv()

True

In [2]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") ## Put your OpenAI API key here
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY") ## Put your Tavily Search API key here
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY") ## Put your Langsmith API key here
os.environ["LANGCHAIN_HUB_API_KEY"] = os.getenv("LANGCHAIN_API_KEY") ## Put your Langsmith API key here
os.environ["LANGCHAIN_TRACING_V2"] = 'true' ## Set this as True
os.environ["LANGCHAIN_ENDPOINT"] = 'https://api.smith.langchain.com/' ## Set this as: https://api.smith.langchain.com/
os.environ["LANGCHAIN_HUB_API_URL"] = 'https://api.hub.langchain.com' ## Set this as : https://api.hub.langchain.com
os.environ["LANGCHAIN_PROJECT"] = 'llm-agents'

### Easy Tool Paper

Paper Link: https://arxiv.org/pdf/2401.06201.pdf
```
@article{yuan2024easytool,
  title   = {EASYTOOL: Enhancing LLM-based Agents with Concise Tool Instruction},
  author  = {Siyu Yuan and Kaitao Song and Jiangjie Chen and Xu Tan and Yongliang Shen and Ren Kan and Dongsheng Li and Deqing Yang},
  journal = {arXiv preprint arXiv:2401.06201},
  year    = {2024}
}
```

![Simplify Tool Description](images/easytool-simplify.png)

### RestBench dataset

This data set has 50 APIs for getting information related to movies and TV shows

In [3]:
def read_json(address):
    """
    Reads a JSON file from a given address and returns its data.

    Parameters:
    address (str): The file path or address of the JSON file to be read.

    Returns:
    dict: The data loaded from the JSON file, typically as a dictionary.
    """
    with open(address, 'r', encoding='utf-8') as json_file:
        json_data = json.load(json_file)
    return json_data

In [4]:
restbench_dic = read_json('./data/tmdb_tool.json')
restbench_dic

[{'ID': 1,
  'tool_name': '/movie/{movie_id}/keywords',
  'tool_description': 'Get the keywords that have been added to a movie. You should first know the movie_id and thus this tool should be used after /search/movie.',
  'tool_usage': 'GET /movie/{movie_id}/keywords',
  'Example': {'Scenario': 'if you want to find out the keywords that have been added to a movie with movie_id 456.',
   'Parameters': {'input': 'GET /movie/456/keywords'}}},
 {'ID': 2,
  'tool_name': '/tv/popular',
  'tool_description': 'Get a list of the current popular TV shows on TMDb.',
  'tool_usage': 'GET /tv/popular',
  'Example': {'Scenario': 'if you want to get a list of the current popular TV shows on TMDb.',
   'Parameters': {'input': 'GET /tv/popular'}}},
 {'ID': 3,
  'tool_name': '/person/{person_id}',
  'tool_description': 'Get the primary person details by id. You should first know the person_id and thus this tool should be used after /search/person.',
  'tool_usage': 'GET /person/{person_id}',
  'Example

In [5]:
def get_tool_usage(tools, id):
    """
    Retrieves the usage and description of a tool from a list of tools based on a given ID.

    Parameters:
    tools (list of dict): A list of dictionaries where each dictionary represents a tool with attributes like ID, tool_usage, and tool_description.
    id (int): The unique identifier of the tool to be retrieved. If the ID is -1, the function performs no operation.

    Returns:
    tuple:
        - str or None: The usage of the tool if found, otherwise None or a specified message.
        - str or None: The description of the tool if found, otherwise None.

    Note:
    If the ID is -1, the function returns None, indicating no operation is performed. If the ID does not match any tool in the list, it returns None, indicating the tool was not found.
    """
    
    # Check if the ID is -1, then return None or a specific message
    if id == -1:
        return None, None # or "No operation for ID -1"
    
    # Iterate through the list of tool dictionaries
    for tool in tools:
        # Check if the current dictionary's ID matches the given ID
        if tool['ID'] == id:
            # Return the corresponding tool_usage
            return tool['tool_usage'], tool['tool_description']
    
    # If the ID was not found, return None or a message indicating so
    return None, None # or "Tool not found"

### Multi-Tool Retrieval Chain using Langchain

Source: https://github.com/microsoft/JARVIS

In [6]:
# model_name = 'gpt-3.5-turbo'
model_name = 'gpt-4-1106-preview'

In [7]:
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate
)
from langchain import LLMChain
from langchain_openai import ChatOpenAI

In [8]:
def get_tool_choice(task_list, tool_dic):
    """
    Processes a list of tasks to extract tool usage and descriptions based on their IDs.

    Parameters:
    task_list (list of dict): A list of dictionaries, each representing a task with an "ID" key that corresponds to a tool's ID in the tool_dic.
    tool_dic (dict): A dictionary or list of dictionaries representing the tools, structured such that it can be queried by `get_tool_usage` function to retrieve a tool's usage and description.

    Returns:
    tuple:
        - list of str: A list of tool usage strings for the tasks, ordered according to the task_list input.
        - list of str: A list of tool description strings corresponding to each tool usage, matching the order of the task_list input.

    Note:
    The function relies on `get_tool_usage` to obtain tool information. Tasks with IDs not found in tool_dic or marked with ID -1 will be skipped, and their information will not appear in the output lists.
    """
    tool_choice_ls = []
    tool_description_ls = []
    for task in task_list:
        id = task["ID"]
        tool_usage, tool_desc = get_tool_usage(tool_dic, id)
        if tool_usage is not None:
            tool_choice_ls.append(tool_usage)
            tool_description_ls.append(tool_desc)
    return tool_choice_ls, tool_description_ls

In [9]:
task_list = [{'Task': "Search for the id of a person with the name 'Sofia Coppola'", 'ID': 35}, {'Task': 'Get the movie credits for a person with person_id 456', 'ID': 50}]
tool_choice_ls, tool_description_ls = get_tool_choice(task_list, restbench_dic)
print(tool_choice_ls, tool_description_ls)

['GET /search/person', 'GET /person/{person_id}/movie_credits'] ['Search for people, which can obtain person_id.', 'Get the movie credits for a person, the results contains various information such as popularity and release date. You should first know the person_id and thus this tool should be used after /search/person.']


### Using OpenAI for tool selection

In [10]:
def task_decompose_openai(question, Tool_dic, openai_model_name):
    """
    Decomposes a complex question into simpler subtasks with corresponding tools using an AI model.

    Parameters:
    - question (str): The complex user question to be decomposed.
    - Tool_dic (dict or str): Descriptions of available tools for solving subtasks.
    - model_name (str): Identifier for the AI model used in decomposition.

    Returns:
    tuple: Contains three lists:
    - A list of subtasks derived from the question.
    - Corresponding tool usages for each subtask.
    - Descriptions of each tool used.

    The function iteratively queries an AI model to break down the given question into actionable subtasks, each associated with a specific tool from Tool_dic. If a subtask doesn't require a tool or if decomposition fails after multiple attempts, the process is aborted.
    """
 
    chat = ChatOpenAI(model_name=openai_model_name, temperature=0.2, model_kwargs={"response_format": {"type": "json_object"}},)
   
        
    template = "You are a helpful assistant. You respond in JSON format"
    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
    human_message_prompt = HumanMessagePromptTemplate.from_template(
        '''
        We have movie database and the following tools:\n
        {Tool_dic}
        You need to decompose a complex user's question into some simple subtasks and let the model execute it step by step with these tools.\n
        Please note that: 
         1. You should break down tasks into appropriate subtasks to use the tools mentioned above.
         2. You should not only list the subtask, but also list the ID of the tool used to solve this subtask.
         3. If you think you do not need to use the tool to solve the subtask, just leave it as {{"ID": -1}}
         4. You MUST consider the logical connections, order and constraints among the tools to achieve a correct tool path.
         5. You must output a LIST of JSONs. The list consists of a sequence of steps. Each JSON is information on the Task and ID. The desired output is shown below
         7. Put your results in the Output key as shown below:

        "Question: Pause the player" \n
        "Output: [{{"Task":"Get information about the users current playback state", "ID":15}}, {{"Task":"Pause playback on the users account", "ID":19}}]"
   
        "This is the user's question: {question}\n"
        "Output:"
        '''
    )
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
    chain = LLMChain(llm=chat, prompt=chat_prompt)
    ind = 0
    tool_choice_ls = []
    tool_description_ls = []
    while True:
        try:
            ### Get the task planning and API output from the model
            result = chain.invoke({"question":question, "Tool_dic":Tool_dic})['text']
            task_list = json.loads(result)['Output']
            ### Use the documentation to retrieve the API from its ID
            tool_choice_ls, tool_description_ls = get_tool_choice(task_list, Tool_dic)
            break
        except Exception as e:
            print(f"task decompose fails: {e}")
            if ind > 4:
                return -1
            ind += 1
            continue
    return task_list, tool_choice_ls, tool_description_ls

In [11]:
query_list = ['give me count of movies directed by Sofia Coppola', "Who was the lead actor in the movie The Dark Knight?", "What is the logo of the Walt Disney?", "What is top-1 rated movie?", "What is the release date of the movie \"The Matrix\"?"]

for query in query_list:
    result, tool_choice_ls, tool_description_ls = task_decompose_openai(query, restbench_dic, model_name)
    print(f"Query is {query} and task decomposition from the model is {result} and selected tools are: {tool_choice_ls}")
    print(f"Description of the selected tools are: {tool_description_ls}")
    print("===================")

  warn_deprecated(


Query is give me count of movies directed by Sofia Coppola and task decomposition from the model is [{'Task': "Search for the id of a person with the name 'Sofia Coppola'", 'ID': 35}, {'Task': 'Get the movie credits for a person', 'ID': 50}, {'Task': 'Count the number of movies directed by the person', 'ID': -1}] and selected tools are: ['GET /search/person', 'GET /person/{person_id}/movie_credits']
Description of the selected tools are: ['Search for people, which can obtain person_id.', 'Get the movie credits for a person, the results contains various information such as popularity and release date. You should first know the person_id and thus this tool should be used after /search/person.']
Query is Who was the lead actor in the movie The Dark Knight? and task decomposition from the model is [{'Task': "Search for the movie id of 'The Dark Knight'", 'ID': 12}, {'Task': "Get the cast and crew for the movie 'The Dark Knight' using the movie_id obtained", 'ID': 53}, {'Task': 'Identify th

### Using Anthropic models

In [12]:
### If you want to test Claude model too, add its key here
os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY")
from langchain_anthropic import ChatAnthropic

In [13]:
# model_name = 'claude-3-opus-20240229'
# model_name = 'claude-3-sonnet-20240229'
model_name='claude-3-haiku-20240307'


In [14]:
def task_decompose_anthropic(question, Tool_dic, anthropic_model_name):
    """
    Decomposes a complex question into simpler subtasks with corresponding tools using an AI model.

    Parameters:
    - question (str): The complex user question to be decomposed.
    - Tool_dic (dict or str): Descriptions of available tools for solving subtasks.
    - model_name (str): Identifier for the AI model used in decomposition.

    Returns:
    tuple: Contains three lists:
    - A list of subtasks derived from the question.
    - Corresponding tool usages for each subtask.
    - Descriptions of each tool used.

    The function iteratively queries an AI model to break down the given question into actionable subtasks, each associated with a specific tool from Tool_dic. If a subtask doesn't require a tool or if decomposition fails after multiple attempts, the process is aborted.
    """
   
    chat = ChatAnthropic(model=anthropic_model_name)
        
    template = "You are a helpful assistant. You respond in JSON format"
    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
    human_message_prompt = HumanMessagePromptTemplate.from_template(
        '''
        We have movie database and the following tools:
        {Tool_dic}
        You need to decompose a complex user's question into some simple subtasks and let the model execute it step by step with these tools.\n
        Please note that: 
         1. You should break down tasks into appropriate subtasks to use the tools mentioned above.
         2. You should not only list the subtask, but also list the ID of the tool used to solve this subtask.
         3. If you think you do not need to use the tool to solve the subtask, just leave it as {{"ID": -1}}
         4. You MUST consider the logical connections, order and constraints among the tools to achieve a correct tool path.
         5. You must output a LIST of JSONs. The list consists of a sequence of steps. Each JSON is information on the Task and ID. The desired output is shown below.
         6. Only return the JSON. Don't give a summary of the steps

        "Question: Pause the player" \n
        "Output: [{{"Task":"Get information about the users current playback state", "ID":15}}, {{"Task":"Pause playback on the users account", "ID":19}}]"
   
        "This is the user's question: {question}\n"
        "Output:"
        '''
    )
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
    chain = LLMChain(llm=chat, prompt=chat_prompt)
    ind = 0
    tool_choice_ls = []
    while True:
        try:
            result = chain.invoke({"question":question, "Tool_dic":Tool_dic})['text']
            print("Result: ", result)
            result = json.loads(result)
            if 'Output' in result:
                task_list = result['Output']
            else:
                task_list = result
            tool_choice_ls, tool_description_ls = get_tool_choice(task_list, Tool_dic)
            break
        except Exception as e:
            print(f"task decompose fails: {e}")
            if ind > 4:
                return -1
            ind += 1
            continue
    return task_list, tool_choice_ls, tool_description_ls

In [15]:
query_list = ['give me count of movies directed by Sofia Coppola', "Who was the lead actor in the movie The Dark Knight?", "What is the logo of the Walt Disney?", "What is top-1 rated movie?", "What is the release date of the movie \"The Matrix\"?"]

for query in query_list:
    time.sleep(1)
    result, tool_choice_ls, tool_description_ls = task_decompose_anthropic(query, restbench_dic, model_name)
    print(f"Query is {query} and task decomposition from the model is {result} and selected tools are: {tool_choice_ls}")
    print(f"Description of the selected tools are: {tool_description_ls}")
    print("===================")

Result:  [
    {
        "Task": "Search for the person ID of Sofia Coppola",
        "ID": 35
    },
    {
        "Task": "Get the movie credits for Sofia Coppola",
        "ID": 50
    },
    {
        "Task": "Count the number of movies directed by Sofia Coppola",
        "ID": -1
    }
]
Query is give me count of movies directed by Sofia Coppola and task decomposition from the model is [{'Task': 'Search for the person ID of Sofia Coppola', 'ID': 35}, {'Task': 'Get the movie credits for Sofia Coppola', 'ID': 50}, {'Task': 'Count the number of movies directed by Sofia Coppola', 'ID': -1}] and selected tools are: ['GET /search/person', 'GET /person/{person_id}/movie_credits']
Description of the selected tools are: ['Search for people, which can obtain person_id.', 'Get the movie credits for a person, the results contains various information such as popularity and release date. You should first know the person_id and thus this tool should be used after /search/person.']
Result:  [
  {

### Selection with an even broader set of tools

If you have an even bigger set of tools to choose from, then another option is to add a tool retriever step in the pipeline. A subset of tools can be retrieved using an embedding model that runs the task description against the tool description to choose the top k.

This is highlighted in the example below:

(<img src="images/tool-selection.png" alt="drawing" width="600"/>)
