In [None]:
!pip install langchain openai tiktoken cohere arxiv duckduckgo-search langchainhub -qU

In [27]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
# import getpass
# os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

True

## LangChain ReAct Agent

### Base LLM Powering RAG

In [4]:
from langchain_community.chat_models.openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")

In [5]:
from langchain_core.messages import HumanMessage

messages = [
    HumanMessage(
        content="Hello, how are you?"
    )
]

In [7]:
response = llm.invoke(messages)
response

AIMessage(content="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to assist you. How can I help you today?", response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 13, 'total_tokens': 46}, 'model_name': 'gpt-4-1106-preview', 'system_fingerprint': 'fp_94f711dcf6', 'finish_reason': 'stop', 'logprobs': None}, id='run-201e17cc-04de-4401-90cd-cc9eaa6491b2-0')

### Tool Belt

In [8]:
from langchain.agents import load_tools

tools = load_tools(["arxiv", "ddg-search"], llm=llm)

### LCEL Agent Construction

In [9]:
from langchain import hub

prompt = hub.pull("hwchase17/react-json")

In [10]:
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input', 'tool_names', 'tools'], metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'react-json', 'lc_hub_commit_hash': '669cf4d6988c3b8994a8189edb3891e07948e1c0abfd500823914548c53afa7c'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['tool_names', 'tools'], template='Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nThe way you use the tools is by specifying a json blob.\nSpecifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n\nThe only values that should be in the "action" field are: {tool_names}\n\nThe $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\n\n```\n{{\n  "action": $TOOL_NAME,\n  "action_input": $INPUT\n}}\n```\n\nALWAYS use the following format:\n\nQuestio

In [11]:
from langchain.tools.render import render_text_description

prompt = prompt.partial(
    tools=render_text_description(tools),
    tool_names=", ".join([t.name for t in tools]),
)

In [12]:
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], partial_variables={'tools': 'arxiv: A wrapper around Arxiv.org Useful for when you need to answer questions about Physics, Mathematics, Computer Science, Quantitative Biology, Quantitative Finance, Statistics, Electrical Engineering, and Economics from scientific articles on arxiv.org. Input should be a search query.\nduckduckgo_search: A wrapper around DuckDuckGo Search. Useful for when you need to answer questions about current events. Input should be a search query.', 'tool_names': 'arxiv, duckduckgo_search'}, metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'react-json', 'lc_hub_commit_hash': '669cf4d6988c3b8994a8189edb3891e07948e1c0abfd500823914548c53afa7c'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['tool_names', 'tools'], template='Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nThe way you use the tools is by specifying 

In [13]:
llm_with_stop = llm.bind(stop=["\nObservation"])

In [15]:
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
    }
    | prompt
    | llm_with_stop
    | ReActJsonSingleInputOutputParser()
)

In [16]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)

In [17]:
agent_executor.invoke(
    {
        "input" : "What is Retrieval Augmented Generation?"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Retrieval Augmented Generation (RAG) is a concept in the field of natural language processing and machine learning. It refers to a methodology where a generative model is augmented with a retrieval component to enhance its performance, typically by providing additional context or information. To provide a detailed and accurate explanation, I can search for scientific articles on arxiv.org that discuss Retrieval Augmented Generation.

Action:
```
{
  "action": "arxiv",
  "action_input": "Retrieval Augmented Generation"
}
```
[0m[36;1m[1;3mPublished: 2022-02-13
Title: A Survey on Retrieval-Augmented Text Generation
Authors: Huayang Li, Yixuan Su, Deng Cai, Yan Wang, Lemao Liu
Summary: Recently, retrieval-augmented text generation attracted increasing attention
of the computational linguistics community. Compared with conventional
generation models, retrieval-augmented text generation has remarkable
advantages and pa

{'input': 'What is Retrieval Augmented Generation?',
 'output': 'Retrieval Augmented Generation (RAG) is a methodology in natural language processing that enhances generative models by incorporating a retrieval component. This retrieval component fetches relevant information or context from a data source, which is then used to inform and improve the generation process. RAG has been shown to achieve state-of-the-art performance in various NLP tasks, such as dialogue response generation and machine translation. The approach can be further improved by context tuning, which retrieves more pertinent context, or by corrective measures like CRAG, which assess the quality of retrieved documents and augment them with large-scale web searches to ensure the relevance and accuracy of the information used in the generation process.'}

In [18]:
agent_executor.invoke(
    {
        "input" : "Who is the current QB of the Denver Broncos?"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To find out the current quarterback of the Denver Broncos, I can use a search engine to get the most recent information.

Action:
```json
{
  "action": "duckduckgo_search",
  "action_input": "current QB of the Denver Broncos"
}
```[0m[33;1m[1;3mWith less than a month to go until the NFL Draft kicks off, the Denver Broncos are still stuck with their looming quarterback question of who will be starting under center for the first year post ... New Broncos starting quarterback is married, a father and has backed up Tom Brady, Cam Newton, Jimmy Garoppolo - and Russell Wilson in his career. Credit: AP Photo/David Zalubowski. Denver ... ENGLEWOOD, Colo. -- The quarterback question is the bottom line of the Denver Broncos' offseason. Coach Sean Payton benched Russell Wilsonwith two games remaining in the 2023 season. The five-year ... Tennessee Titans quarterback Ryan Tannehill (17) throws during the first half of an NFL 

{'input': 'Who is the current QB of the Denver Broncos?',
 'output': 'Jarrett Stidham started in the final two games of the 2023 season for the Denver Broncos, and with the team planning to release Russell Wilson, it appears that Stidham may be the current quarterback. However, the exact status of the starting quarterback position for the upcoming season is not definitively stated in the search results.'}

## OpenAI Assistant API

Helper Functions for Assistant API taking from [this](https://gist.github.com/assafelovic/579822cd42d52d80db1e1c1ff82ffffd) gist.

In [19]:
from duckduckgo_search import DDGS

def duckduckgo_search(query):
  with DDGS() as ddgs:
    results = [r for r in ddgs.text(query, max_results=5)]
    return "\n".join(result["body"] for result in results)

In [20]:
duckduckgo_search("Who is the current captain of the Winnipeg Jets?")

'Lowry has been a part of the Jets organization since June 25, 2011, when he was selected in the third round, 67 th overall, after putting up 37 points in 36 games with the Swift Current Broncos of ...\nAdam Lowry, who has been a Jet since 2011 when he was drafted 67th overall, is the new captain of the NHL team — its third since relocating to Winnipeg from Atlanta in 2011. Andrew Ladd served ...\nLowry will follow Andrew Ladd and Blake Wheeler to serve as the third captain of the new Winnipeg Jets franchise. - Sep 12, 2023. After a season without a captain, the Winnipeg Jets have named ...\nThe Winnipeg Jets will have a captain for the 2023-24 season. ... Although it will be his first time serving as a team captain since his final year with the Swift Current Broncos in 2012-13, Lowry ...\nWinnipeg played without a captain last season after stripping Blake Wheeler of the title in September. Lowry is entering his 10th season with the Jets, who drafted him in the third round in 2011.'

In [21]:
import arxiv

def arxiv_search(query):
  results = []

  search = arxiv.Search(
      query = query,
      max_results = 5,
      sort_by = arxiv.SortCriterion.Relevance
  )

  for result in arxiv.Client().results(search):
    results.append(result)

  return "\n".join(result.summary for result in results)

In [None]:
arxiv_search("Retrieval Augmented Generation?")

"Existing research on response generation for chatbot focuses on \\textbf{First\nResponse Generation} which aims to teach the chatbot to say the first response\n(e.g. a sentence) appropriate to the conversation context (e.g. the user's\nquery). In this paper, we introduce a new task \\textbf{Second Response\nGeneration}, termed as Improv chat, which aims to teach the chatbot to say the\nsecond response after saying the first response with respect the conversation\ncontext, so as to lighten the burden on the user to keep the conversation\ngoing. Specifically, we propose a general learning based framework and develop\na retrieval based system which can generate the second responses with the\nusers' query and the chatbot's first response as input. We present the approach\nto building the conversation corpus for Improv chat from public forums and\nsocial networks, as well as the neural networks based models for response\nmatching and ranking. We include the preliminary experiments and resu

In [22]:
ddg_function = {
    "name" : "duckduckgo_search",
    "description" : "Answer non-technical questions. Do not use this tool for questions about Machine Learning.",
    "parameters" : {
        "type" : "object",
        "properties" : {
            "query" : {
                "type:" : "string",
                "description" : "The search query to use. For example: 'Who is the current Goalie of the Colorado Avalance?'"
            }
        },
        "required" : ["query"]
    }
}

In [23]:
arxiv_function = {
    "name" : "arxiv_query",
    "description" : "Answer technical questions about the Machine Learning domain.",
    "parameters" : {
        "type" : "object",
        "properties" : {
            "query" : {
                "type:" : "string",
                "description" : "The search query to use. For example: 'Retrieval Augmented Generation'"
            }
        },
        "required" : ["query"]
    }
}

In [29]:
from openai import OpenAI

client = OpenAI()

assistant = client.beta.assistants.create(
    name="Query Assistant",
    instructions="You are a personal assistant. Use the provided functions to answer questions.",
    tools=[
        {"type" : "function",
         "function" : arxiv_function
        },
        {"type": "function",
         "function" : ddg_function
        }
    ],
    model="gpt-4-1106-preview"
)

InternalServerError: Error code: 500 - {'error': {'message': 'The server had an error processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if you keep seeing this error. (Please include the request ID req_04328f24bb7c70e52a5c84bff301360b in your email.)', 'type': 'server_error', 'param': None, 'code': None}}

In [None]:
assistant_id = assistant.id

print(f"Assistant ID: {assistant_id}")

Assistant ID: asst_zKGTZPDZ1vwUCy1t3COqGe7Y


In [None]:
thread = client.beta.threads.create()
user_input = "What is the capital of Ottawa?"
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=user_input,
)

In [None]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant_id,
)

In [None]:
import time

def wait_for_run_completion(thread_id, run_id):
    while True:
        time.sleep(1)
        run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
        print(f"Current run status: {run.status}")
        if run.status in ['completed', 'failed', 'requires_action']:
            return run

In [None]:
run = wait_for_run_completion(thread.id, run.id)

Current run status: requires_action


In [None]:
import json

def submit_tool_outputs(thread_id, run_id, tools_to_call):
    tool_output_array = []
    for tool in tools_to_call:
        output = None
        tool_call_id = tool.id
        function_name = tool.function.name
        function_args = tool.function.arguments

        if function_name == "duckduckgo_search":
            print("Consulting Duck Duck Go...")
            output = duckduckgo_search(query=json.loads(function_args)["query"])

        if function_name == "arxiv_query":
            print("Consulting Arxiv...")
            output = arxiv_search(query=json.loads(function_args)["query"])

        if output:
            tool_output_array.append({"tool_call_id": tool_call_id, "output": output})

    print(tool_output_array)

    return client.beta.threads.runs.submit_tool_outputs(
        thread_id=thread_id,
        run_id=run_id,
        tool_outputs=tool_output_array
    )

In [None]:
if run.status == 'requires_action':
    run = submit_tool_outputs(thread.id, run.id, run.required_action.submit_tool_outputs.tool_calls)
    run = wait_for_run_completion(thread.id, run.id)

Consulting Duck Duck Go...
[{'tool_call_id': 'call_vBWBxJPTMBe5NNxkJj7Pyd0v', 'output': 'Gradient descent is a first-order optimization algorithm for finding a local minimum of a differentiable function. It steps in the direction of the negative gradient of the function at the current point, which is the steepest descent direction. It is useful for minimizing the cost or loss function and has various extensions and applications in machine learning and data mining.\nLearn how gradient descent is a general-purpose algorithm that numerically estimates where a function outputs its lowest values. See examples of how to use it for multivariable functions, polynomials, and machine learning problems.\nGradient descent is an optimization algorithm that trains machine learning models and neural networks by minimizing errors between predicted and actual results. Learn how it works, its types, challenges and applications with examples from IBM.\nLearn the basics of Gradient Descent, an optimizatio

In [None]:
def print_messages_from_thread(thread_id):
    messages = client.beta.threads.messages.list(thread_id=thread_id)
    for msg in messages:
        print(f"{msg.role}: {msg.content[0].text.value}")

In [None]:
print_messages_from_thread(thread.id)

assistant: Gradient descent is a first-order optimization algorithm used to find the local minimum of a differentiable function. It operates by iteratively moving towards the direction of the steepest descent as defined by the negative of the gradient. The process involves taking steps proportional to the negative of the gradient (or approximate gradient) of the function at the current point.

Gradient descent is particularly useful in machine learning and neural networks, where it is employed to minimize the cost or loss function—the measure of the difference between the actual and predicted outputs. By minimizing this error, the model's parameters are optimized during training.

The algorithm starts from an initial point and takes steps proportional to the negative gradient of the function at the current point, gradually moving towards the area where the function has its lowest value, which corresponds to the most optimal solution for the model.

There are different types of gradient

In [None]:
def use_assistant(query):
  thread = client.beta.threads.create()

  message = client.beta.threads.messages.create(
      thread_id=thread.id,
      role="user",
      content=query,
  )

  print("Creating Assistant ")

  run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant_id,
  )

  print("Querying OpenAI Assistant Thread.")

  run = wait_for_run_completion(thread.id, run.id)

  if run.status == 'requires_action':
    run = submit_tool_outputs(thread.id, run.id, run.required_action.submit_tool_outputs.tool_calls)
    run = wait_for_run_completion(thread.id, run.id)

  print_messages_from_thread(thread.id)

In [None]:
use_assistant("What is QLoRA?")

Creating Assistant 
Querying OpenAI Assistant Thread.
Current run status: in_progress
Current run status: requires_action
Consulting Arxiv...
[{'tool_call_id': 'call_MAr2jwPt5OPhkXKQmCj4S5mn', 'output': "We present QLoRA, an efficient finetuning approach that reduces memory usage\nenough to finetune a 65B parameter model on a single 48GB GPU while preserving\nfull 16-bit finetuning task performance. QLoRA backpropagates gradients through\na frozen, 4-bit quantized pretrained language model into Low Rank\nAdapters~(LoRA). Our best model family, which we name Guanaco, outperforms all\nprevious openly released models on the Vicuna benchmark, reaching 99.3% of the\nperformance level of ChatGPT while only requiring 24 hours of finetuning on a\nsingle GPU. QLoRA introduces a number of innovations to save memory without\nsacrificing performance: (a) 4-bit NormalFloat (NF4), a new data type that is\ninformation theoretically optimal for normally distributed weights (b) double\nquantization to 

In [None]:
use_assistant("What is a meme?")

Creating Assistant 
Querying OpenAI Assistant Thread.
Current run status: in_progress
Current run status: requires_action
Consulting Duck Duck Go...
[{'tool_call_id': 'call_UyLHF92vEZGEHFEfSsyh9PmA', 'output': 'A meme is an amusing or interesting item (such as a captioned picture or video) or genre of items that is spread widely online especially through social media. It can also mean an idea, behavior, style, or usage that spreads from person to person within a culture. Learn more about the origin, history, and examples of memes from this dictionary entry.\nA meme is a virally transmitted image or video with text that shares commentary on cultural, social, or current events. Learn the origins, examples, and categories of memes, from general humor to world events, and how to make your own with Meme Generator.\nA meme is a cultural shorthand that portrays a particular concept or idea and is spread through online social platforms. It can be an image, video, or text with a new caption tha