In [2]:
!pip install --upgrade pip --quiet --disable-pip-version-check --root-user-action=ignore
!pip install langchain==0.0.206 --quiet --disable-pip-version-check --root-user-action=ignore
!pip install openai --quiet --disable-pip-version-check --root-user-action=ignore

In [3]:
import re, os, boto3, json
from typing import List, Union
import nltk
from sagemaker import Session
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import StringPromptTemplate
from langchain import OpenAI, SerpAPIWrapper, LLMChain
from langchain.schema import AgentAction, AgentFinish
from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint

# Custom LLM Agent

This notebook goes through how to create your own custom LLM agent.

An LLM agent consists of three parts:

- PromptTemplate: This is the prompt template that can be used to instruct the language model on what to do
- LLM: This is the language model that powers the agent
- `stop` sequence: Instructs the LLM to stop generating as soon as this string is found
- OutputParser: This determines how to parse the LLMOutput into an AgentAction or AgentFinish object


The LLMAgent is used in an AgentExecutor. This AgentExecutor can largely be thought of as a loop that:
1. Passes user input and any previous steps to the Agent (in this case, the LLMAgent)
2. If the Agent returns an `AgentFinish`, then return that directly to the user
3. If the Agent returns an `AgentAction`, then use that to call a tool and get an `Observation`
4. Repeat, passing the `AgentAction` and `Observation` back to the Agent until an `AgentFinish` is emitted.
    
`AgentAction` is a response that consists of `action` and `action_input`. `action` refers to which tool to use, and `action_input` refers to the input to that tool. `log` can also be provided as more context (that can be used for logging, tracing, etc).

`AgentFinish` is a response that contains the final message to be sent back to the user. This should be used to end an agent run.
        
In this notebook we walk through how to create a custom LLM agent.

## Set up environment

Do necessary imports, etc.

In [4]:
#load stored variables from previous notebook
%store -r

# Initialize key environment variables
sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()
aws_region = boto3.Session().region_name
sm_client = boto3.client("sagemaker", aws_region)
model_version = "*"

print(falcon_inference_model)
# print(flant5_inference_model)
print(embedding_model)
# print(falcon_inference_model)
try:
    print(ai21_inference_model)
except:
    print("You did not load the AI21 Jurassic Grande model and will not be able to select it for this lab")

huggingface-llm-falcon-40b-instruct-bf16
huggingface-textembedding-gpt-j-6b-fp16
You did not load the AI21 Jurassic Grande model and will not be able to select it for this lab


### Registering your Third-Party API key (PURELY OPTIONAL!)
We will be running a section on the ChatModel API exposed by a series of API endpoint providers such as OpenAI, Anthropic, Google Vertex. As this is currently not supported by the SageMaker deployed models, you can choose to experimment with at your own costs if you register for an OpenAI key, or you have previous access to Anthropic (as they are currently not accepting new registrations)

In [5]:
openai_api_key=""
anthropic_api_key=""

#### Load Widgets used across the notebook

In [6]:
from ipywidgets import Select, Text

# This creates the widgets used across the notebook for easier configuration
model_selections = [
    # 'Flan-t5-xxl',
    'SageMaker-Falcon40B', 
    # 'AI21-Jurrasic-Grande'
]
# Subset based on available ApiKeys
if openai_api_key:
    model_selections.append('OpenAI')
if anthropic_api_key:
    model_selections.append('Anthropic-Claude')

model_selection_widget = Select(
    options=model_selections
)

In [7]:
# Connecting to Third-party endpoints using provided API keys 
from langchain import OpenAI

if openai_api_key:
    openai_llm = OpenAI(openai_api_key=openai_api_key)
    print("(success) - Successfully connected to OpenAI")
else:
    print("(failure) - You have not provided an OpenAPI key, and you won't have access to work with the model in this notebook")

# Work with Anthropic
from langchain import Anthropic

if anthropic_api_key:
    anthropic_llm = Anthropic(anthropic_api_key=anthropic_api_key)
    print("(success) - Successfully connected to Anthropic")
else:
    print("(failure) - You have not provided an AnthropicAPI key, and you won't have access to work with the model in this notebook")

(failure) - You have not provided an OpenAPI key, and you won't have access to work with the model in this notebook
(failure) - You have not provided an AnthropicAPI key, and you won't have access to work with the model in this notebook


In [8]:
# Installing reuqired dependencies for third party Foundation APIs
import sys
if openai_api_key:
    !pip install openai --quiet
if anthropic_api_key:
    !pip install anthropic --quiet

## Connecting your model on AWS SageMaker
To work with your models on AWS you can use either an integration with the SageMaker endpoint, or in the future directly talk to the Bedrock API. 

For now, let's look at how to work with a custom SageMaker Model Endpoint.

In [9]:
# Model configuration for FlanT5 model 
flant5_parameters = {
    "max_new_tokens": 200,
    "max_length": 1024,
    # "num_return_sequences": 1,
    "top_k": 1,
    # "top_p": 0.50,
    "do_sample": True,
    "temperature": 0.1,
    "return_full_text": False,
    "include_prompt_in_result": False,
}
# Model configuration for falcon 40b instruct
falcon_parameters = {
    "max_new_tokens": 200,
    "max_length": 1024,
    # "num_return_sequences": 1,
    "top_k": 1,
    # "top_p": 0.50,
    "do_sample": True,
    "temperature": 0.1,
    "return_full_text": False,
    "include_prompt_in_result": False,
}

# Configuration for Jurassic Grande Model
j2_grande_parameters = {
    "max_new_tokens": 200,
    "max_length": 1024,
    # "num_return_sequences": 1,
    "top_k": 1,
    # "top_p": 0.50,
    "do_sample": True,
    "temperature": 0.1,
    "return_full_text": False,
    "include_prompt_in_result": False,
}

In [10]:
import json
from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint

class InferenceContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs={}) -> bytes:
        input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json[0]["generated_text"]

inference_content_handler = InferenceContentHandler()

# Instantiate all available models
# flant5_xxl_llm = SagemakerEndpoint(
#     endpoint_name=_MODEL_CONFIG_[flant5_inference_model]['endpoint_name'],
#     region_name=aws_region,
#     model_kwargs=flant5_parameters,
#     content_handler=inference_content_handler,
#     )
# print(f"Loaded model endpoint: {flant5_inference_model}")

falcon_sm_llm = SagemakerEndpoint(
    endpoint_name=_MODEL_CONFIG_[falcon_inference_model]['endpoint_name'],
    region_name=aws_region,
    model_kwargs=falcon_parameters,
    content_handler=inference_content_handler,
    )
print(f"Loaded model endpoint: {falcon_inference_model}")

# Optional load
# j2_grande_sm_llm = SagemakerEndpoint(
#     endpoint_name=_MODEL_CONFIG_[ai21_inference_model]['endpoint_name'],
#     region_name=aws_region,
#     model_kwargs=j2_grande_parameters,
#     content_handler=inference_content_handler,
#     )
# print(f"Loaded model endpoint: {ai21_inference_model}")



Loaded model endpoint: huggingface-llm-falcon-40b-instruct-bf16


In [11]:
model_selection_widget

Select(options=('SageMaker-Falcon40B',), value='SageMaker-Falcon40B')

In [12]:
match model_selection_widget.value:
    case "Flan-t5-xxl":
        llm = flant5_xxl_llm 
    case "SageMaker-Falcon40B":
        llm = falcon_sm_llm
    case "AI21-Jurrasic-Grande":
        llm = j2_grande_sm_llm
    case "OpenAI":
        llm = openai_llm
        
print(f"Activated {model_selection_widget.value}")

Activated SageMaker-Falcon40B


In [13]:
# Set model configuration
parameters = {
    "max_new_tokens": 200,
    "max_length": 1024,
    # "num_return_sequences": 1,
    "top_k": 1,
    # "top_p": 0.50,
    "do_sample": True,
    "temperature": 0.1,
    "return_full_text": False,
    "include_prompt_in_result": False,
}

class InferenceContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs={}) -> bytes:
        nltk_tokens = nltk.word_tokenize(prompt)
        print(f'[INFO] - Nb of tokens for prompt/context:{len(prompt)}\n')
        input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json[0]["generated_text"]

inference_content_handler = InferenceContentHandler()

# Instantiate all available models 
falcon_sm_llm = SagemakerEndpoint(
    endpoint_name=_MODEL_CONFIG_[falcon_inference_model]['endpoint_name'],
    region_name=aws_region,
    model_kwargs=falcon_parameters,
    content_handler=inference_content_handler,
    )
print(f"Loaded model endpoint: {falcon_inference_model}")

# flant5_xxl_llm = SagemakerEndpoint(
#     endpoint_name=_MODEL_CONFIG_[flant5_inference_model]['endpoint_name'],
#     region_name=aws_region,
#     model_kwargs=flant5_parameters,
#     content_handler=inference_content_handler,
#     )
# print(f"Loaded model endpoint: {flant5_inference_model}")

# Optional load
# j2_grande_sm_llm = SagemakerEndpoint(
#     endpoint_name=_MODEL_CONFIG_[ai21_inference_model]['endpoint_name'],
#     region_name=aws_region,
#     model_kwargs=j2_grande_parameters,
#     content_handler=inference_content_handler,
#     )
# print(f"Loaded model endpoint: {ai21_inference_model}")

Loaded model endpoint: huggingface-llm-falcon-40b-instruct-bf16


## Set up tools

Set up any tools the agent may want to use. This may be necessary to put in the prompt (so that the agent knows to use these tools).

SERAPI is a webscraper for Google and other search engines that provides api to search for the content.

register here for 30 days free trial:
https://serpapi.com/users/sign_up

In [14]:
!pip install google-search-results  --quiet

[0m

In [15]:
os.environ["SERPAPI_API_KEY"] = "<insert your key here>"

In [16]:
# Define which tools the agent can use to answer user queries
search = SerpAPIWrapper()
tools = [
    Tool(
        name = "Search",
        func=search.run,
        description="useful for when you need to answer questions about current events"
    )
]

## Prompt Template

This instructs the agent on what to do. Generally, the template should incorporate:
    
- `tools`: which tools the agent has access and how and when to call them.
- `intermediate_steps`: These are tuples of previous (`AgentAction`, `Observation`) pairs. These are generally not passed directly to the model, but the prompt template formats them in a specific way.
- `input`: generic user input

### We're adding few shots learning for better performance

In [17]:
few_shots=""

In [18]:
'''
few_shots = """Answer the following questions as best you can. You have access to the following tools:\n\n"""
few_shots += "\n".join([f"{tool.name}: {tool.description}" for tool in tools])
few_shots += """\n\nUse the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: What profession does Nicholas Ray and Elia Kazan have in common?
Thought: They are both filmmakers.
Action: Search
Action Input: "Nicholas Ray" "Elia Kazan" profession

Observation:Nicholas Ray was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause. He is appreciated for many narrative ...
 Elia Kazan was also a film director, screenwriter, and actor.
Action: Search
Action Input: "Elia Kazan" profession

Observation:Elia Kazan was an American film and theatre director, producer, screenwriter and actor, described by The New York Times as "one of the most honored and influential directors in Broadway and Hollywood history". Born in Constantinople, to Cappadocian Greek parents, his family came to the United States in 1913.
 They both have the same profession of film director, screenwriter, and actor.
Final Answer: Nicholas Ray and Elia Kazan have the same profession of film director, screenwriter, and actor.
\n\n"""
'''

'\nfew_shots = """Answer the following questions as best you can. You have access to the following tools:\n\n"""\nfew_shots += "\n".join([f"{tool.name}: {tool.description}" for tool in tools])\nfew_shots += """\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: What profession does Nicholas Ray and Elia Kazan have in common?\nThought: They are both filmmakers.\nAction: Search\nAction Input: "Nicholas Ray" "Elia Kazan" profession\n\nObservation:Nicholas Ray was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause. He is appreciated for

In [19]:
'''
few_shots += """Answer the following questions as best you can. You have access to the following tools:\n\n"""
few_shots += "\n".join([f"{tool.name}: {tool.description}" for tool in tools])
few_shots += """\n\nUse the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?
Thought: Matt Groening named Milhouse after someone.
Thought: I need to find out who that is.
Action: Search
Action Input: "Matt Groening" "Milhouse"

Observation:For other uses, see Millhouse (disambiguation) and Milhous (disambiguation). Milhouse Mussolini Van Houten is a recurring character in the Fox animated ...
 I need to find out who Matt Groening named Milhouse after.
Action: Search
Action Input: "Matt Groening" "Milhouse" origin

Observation:Creation. Milhouse was designed by Matt Groening for a planned series on NBC, which was abandoned. The design was then used for a Butterfinger commercial, and it was decided to use the character in the series. Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.
 I now know the final answer.
Final Answer: Matt Groening named the character Milhouse after U.S. president Richard Nixon, whose middle name was Milhous.\n\n"""
'''

'\nfew_shots += """Answer the following questions as best you can. You have access to the following tools:\n\n"""\nfew_shots += "\n".join([f"{tool.name}: {tool.description}" for tool in tools])\nfew_shots += """\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?\nThought: Matt Groening named Milhouse after someone.\nThought: I need to find out who that is.\nAction: Search\nAction Input: "Matt Groening" "Milhouse"\n\nObservation:For other uses, see Millhouse

In [20]:
# Set up the base template
template = """{few_shots}
Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
{agent_scratchpad}"""

In [21]:
# Set up a prompt template
class CustomPromptTemplate(StringPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]
    
    few_shots: str
    
    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
            
        #Set the few shots variable
        kwargs["few_shots"] = self.few_shots
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])

        #print(self.template.format(**kwargs))
        return self.template.format(**kwargs)

In [22]:
prompt = CustomPromptTemplate(
    template=template,
    tools=tools,
    few_shots=few_shots,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"]
)

## Output Parser

The output parser is responsible for parsing the LLM output into `AgentAction` and `AgentFinish`. This usually depends heavily on the prompt used.

This is where you can change the parsing to do retries, handle whitespace, etc

In [23]:
class CustomOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        #print(f'llm_output={llm_output}')
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        #regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        regex = r"Action\s*\d*\s*:(.*?)Action\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)

In [24]:
output_parser = CustomOutputParser()

## Set up LLM

In [25]:
model_selection_widget

Select(options=('SageMaker-Falcon40B',), value='SageMaker-Falcon40B')

In [26]:
# Loading the selected model
match model_selection_widget.value:
    case "SageMaker-Falcon40B":
        llm = falcon_sm_llm
    case "OpenAI":
        llm = openai_llm
        
print(f"Activated {model_selection_widget.value}")

Activated SageMaker-Falcon40B


## Define the stop sequence

This is important because it tells the LLM when to stop generation.

This depends heavily on the prompt and model you are using. Generally, you want this to be whatever token you use in the prompt to denote the start of an `Observation` (otherwise, the LLM may hallucinate an observation for you).

## Set up the Agent

We can now combine everything to set up our agent

In [27]:
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)

In [28]:
tool_names = [tool.name for tool in tools]
print(f'tool_names={tool_names}')
agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:"], 
    allowed_tools=tool_names
)

tool_names=['Search']


## Use the Agent

Now we can use it!

In [29]:
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

In [30]:
#question = """Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?"""
#question= "How many people live in canada as of 2023?"
#question = "What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?"
question = "What profession does Nicholas Ray and Elia Kazan have in common?"
#question= "who won the roland garros tennis tournament in 2023?"
#question= "Were Pavel Urysohn and Leonid Levin known for the same type of work?"

In [None]:
agent_executor.run(question)

## Adding Memory

If you want to add memory to the agent, you'll need to:

1. Add a place in the custom prompt for the chat_history
2. Add a memory object to the agent executor.

In [None]:
# Set up the base template
template_with_history = """{few_shots}
Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Previous conversation history:
{history}

New question: {input}
{agent_scratchpad}"""

In [None]:
prompt_with_history = CustomPromptTemplate(
    template=template_with_history,
    tools=tools,
    few_shots=few_shots,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps", "history"]
)

In [None]:
llm_chain = LLMChain(llm=llm, prompt=prompt_with_history)

In [None]:
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:"], 
    allowed_tools=tool_names
)

In [None]:
from langchain.memory import ConversationBufferWindowMemory

In [None]:
memory=ConversationBufferWindowMemory(k=2)

In [None]:
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)

In [None]:
agent_executor.run("How many people live in canada as of 2023?")

In [None]:
agent_executor.run("how about in mexico?")