#### Tuesday, April 16, 2024

mamba activate langchain3

This all runs in one pass.

In [1]:
# Example: reuse your existing OpenAI setup
from openai import OpenAI

# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

completion = client.chat.completions.create(
  model="TheBloke/NexusRaven-V2-13B-GGUF",
  messages=[
    {"role": "system", "content": "Always answer in rhymes."},
    {"role": "user", "content": "Introduce yourself."}
  ],
  temperature=0.7,
)

print(completion.choices[0].message)

ChatCompletionMessage(content="I'm a chatbot. My name is Emily, and I was created by Meta AI to help people communicate with each other. I can answer questions, provide information, and even engage in conversation. Is there anything specific you would like me to help you with?", role='assistant', function_call=None, tool_calls=None)


# Streaming

This notebook covers functionality related to streaming.

For more information, see:

- [Streaming with LCEL](https://python.langchain.com/docs/expression_language/interface#stream)

- [Streaming for RAG](https://python.langchain.com/docs/use_cases/question_answering/streaming)

- [Streaming for Agents](https://python.langchain.com/docs/modules/agents/how_to/streaming)

## Basic Streaming

In [2]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [3]:
prompt = ChatPromptTemplate.from_template("Tell me a joke about {topic}")

In [4]:

# model = ChatOpenAI()
model = ChatOpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

In [5]:

output_parser = StrOutputParser()
chain = prompt | model | output_parser

In [6]:
for s in chain.stream({"topic": "bears"}):
    print(s)

# 5.8s

Here
'
s
 a
 jo
ke
 for
 you
:
 Why
 did
 the
 bear
 go
 to
 the
 doctor
?
 Because
 he
 was
 feeling
 r
uff
!



## Streaming with RunnableParallel

In [7]:
from langchain_core.runnables import RunnableParallel

prompt = ChatPromptTemplate.from_template("Tell me a joke about {topic}")

In [8]:
# model = ChatOpenAI()
model = ChatOpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

In [9]:
output_parser = StrOutputParser()
chain1 = prompt | model | output_parser

In [10]:
prompt = ChatPromptTemplate.from_template("Write me a poem about {topic}")
# model = ChatOpenAI()
model = ChatOpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")
output_parser = StrOutputParser()
chain2 = prompt | model | output_parser

In [11]:
parallel_chain = RunnableParallel({
    "joke": chain1,
    "poem": chain2
})

In [12]:
for s in parallel_chain.stream({"topic": "bears"}):
    print(s)

# 8.4s

{'poem': 'I'}
{'poem': ' am'}
{'poem': ' unable'}
{'poem': ' to'}
{'poem': ' generate'}
{'poem': ' a'}
{'poem': ' poem'}
{'poem': ' about'}
{'poem': ' be'}
{'poem': 'ars'}
{'poem': ' as'}
{'poem': ' it'}
{'poem': ' is'}
{'poem': ' not'}
{'poem': ' within'}
{'poem': ' my'}
{'poem': ' capabilities'}
{'poem': '.'}
{'poem': ' However'}
{'poem': ','}
{'poem': ' I'}
{'poem': ' can'}
{'poem': ' provide'}
{'poem': ' you'}
{'poem': ' with'}
{'poem': ' information'}
{'poem': ' on'}
{'poem': ' how'}
{'poem': ' to'}
{'poem': ' write'}
{'poem': ' a'}
{'poem': ' poem'}
{'poem': ' about'}
{'poem': ' be'}
{'poem': 'ars'}
{'poem': ' if'}
{'poem': ' you'}
{'poem': ' would'}
{'poem': ' like'}
{'poem': '.<'}
{'poem': '|'}
{'poem': 'im'}
{'poem': '_'}
{'poem': 'end'}
{'poem': '|'}
{'poem': '>'}
{'poem': ''}
{'joke': 'Here'}
{'joke': "'"}
{'joke': 's'}
{'joke': ' a'}
{'joke': ' jo'}
{'joke': 'ke'}
{'joke': ' for'}
{'joke': ' you'}
{'joke': ':'}
{'joke': ' Why'}
{'joke': ' did'}
{'joke': ' the'}
{'joke': ' b

In [13]:
result = {}
for s in parallel_chain.stream({"topic": "bears"}):
    for k,v in s.items():
        if k not in result:
            result[k] = ""
        result[k] += v
    print(result)

{'joke': 'Ok'}
{'joke': 'Okay'}
{'joke': 'Okay,'}
{'joke': 'Okay, here'}
{'joke': "Okay, here'"}
{'joke': "Okay, here's"}
{'joke': "Okay, here's a"}
{'joke': "Okay, here's a jo"}
{'joke': "Okay, here's a joke"}
{'joke': "Okay, here's a joke for"}
{'joke': "Okay, here's a joke for you"}
{'joke': "Okay, here's a joke for you:"}
{'joke': "Okay, here's a joke for you: Why"}
{'joke': "Okay, here's a joke for you: Why did"}
{'joke': "Okay, here's a joke for you: Why did the"}
{'joke': "Okay, here's a joke for you: Why did the bear"}
{'joke': "Okay, here's a joke for you: Why did the bear go"}
{'joke': "Okay, here's a joke for you: Why did the bear go to"}
{'joke': "Okay, here's a joke for you: Why did the bear go to the"}
{'joke': "Okay, here's a joke for you: Why did the bear go to the doctor"}
{'joke': "Okay, here's a joke for you: Why did the bear go to the doctor?"}
{'joke': "Okay, here's a joke for you: Why did the bear go to the doctor? Because"}
{'joke': "Okay, here's a joke for you: 

## Stream Log

In [14]:
from langchain_community.retrievers.tavily_search_api import TavilySearchAPIRetriever
from langchain_core.runnables import RunnablePassthrough

In [17]:
# enter your api key
import os
from getpass import getpass

TAVILY_API_KEY = getpass("Enter your API key: ")
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY

In [18]:
retriever= TavilySearchAPIRetriever()

prompt = ChatPromptTemplate.from_template("""Answer the question based only on the context provided:

Context: {context}

Question: {question}""")

chain = prompt | model | output_parser

retrieval_chain = RunnablePassthrough.assign(
    context=(lambda x: x["question"]) | retriever.with_config(run_name="Docs")
) | chain

In [19]:
for s in retrieval_chain.stream({"question": "what is langsmith"}):
    print(s, end="")

 
Call: load_tools(tool_apis=['serpapi'], llm='llm-math')<bot_end> 
Thought: The function call `load_tools(tool_apis=['serpapi'], llm='llm-math')` answers the question "What is LangSmith?" because it loads the tool API 'serpapi' and the LLM 'llm-math'.

The docstring for the function explains that it initializes a chat model, loads specific tools, and creates an agent that can generate responses based on descriptions. The `tool_apis` parameter is used to specify which tool APIs should be loaded, and in this case, we are loading the 'serpapi' tool API.

The `llm` parameter is used to specify which LLM should be loaded, and in this case, we are loading the 'llm-math' LLM. The docstring for the function also explains that it takes an llm instance as a parameter, which means that the function will use the specified LLM when generating responses.

Therefore, the function call `load_tools(tool_apis=['serpapi'], llm='llm-math')` answers the question "What is LangSmith?" by loading the 'serpap

In [20]:
async for s in retrieval_chain.astream_log({"question": "what is langsmith"}):
    print(s, end="")

RunLogPatch({'op': 'replace',
  'path': '',
  'value': {'final_output': None,
            'id': 'c6d5454f-aab5-4879-bae5-b2afa95bf180',
            'logs': {},
            'name': 'RunnableSequence',
            'streamed_output': [],
            'type': 'chain'}})RunLogPatch({'op': 'add',
  'path': '/logs/RunnableAssign<context>',
  'value': {'end_time': None,
            'final_output': None,
            'id': 'ad271571-8cc3-43b6-9265-0a0183c45ea0',
            'metadata': {},
            'name': 'RunnableAssign<context>',
            'start_time': '2024-04-16T15:56:22.921+00:00',
            'streamed_output': [],
            'streamed_output_str': [],
            'tags': ['seq:step:1'],
            'type': 'chain'}})RunLogPatch({'op': 'add',
  'path': '/logs/RunnableAssign<context>/streamed_output/-',
  'value': {'question': 'what is langsmith'}})RunLogPatch({'op': 'add',
  'path': '/logs/RunnableParallel<context>',
  'value': {'end_time': None,
            'final_output': None,
  

In [21]:
async for s in retrieval_chain.astream_log({"question": "what is langsmith"}, include_names=["Docs"]):
    print(s, end="")

RunLogPatch({'op': 'replace',
  'path': '',
  'value': {'final_output': None,
            'id': '4ca469c7-35ea-4cab-a204-a123115ec2a1',
            'logs': {},
            'name': 'RunnableSequence',
            'streamed_output': [],
            'type': 'chain'}})RunLogPatch({'op': 'add',
  'path': '/logs/Docs',
  'value': {'end_time': None,
            'final_output': None,
            'id': '85351804-4f3a-4f82-b1c9-22ada283bf84',
            'metadata': {},
            'name': 'Docs',
            'start_time': '2024-04-16T15:57:24.987+00:00',
            'streamed_output': [],
            'streamed_output_str': [],
            'tags': ['seq:step:2'],
            'type': 'retriever'}})RunLogPatch({'op': 'add',
  'path': '/logs/Docs/final_output',
  'value': {'documents': [Document(page_content='We consistently see developers relying on LangSmith to track the system-level performance of their application (like latency and cost), track the model/chain performance (through associating f

## Agents

### Stream Actions

In [22]:
from langchain import hub
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_openai import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults

search = TavilySearchResults()
tools = [search]

# Get the prompt to use - you can modify this!
# If you want to see the prompt in full, you can at: https://smith.langchain.com/hub/hwchase17/openai-functions-agent
prompt = hub.pull("hwchase17/openai-functions-agent")

In [23]:
# llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
llm = ChatOpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

In [24]:
agent = create_openai_functions_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools)

In [25]:
for chunk in agent_executor.stream({"input": "what is the weather in SF and then LA"}):
    print(chunk)
    print("------")

{'output': "Sorry, I'm not able to provide that information. However, I can offer some suggestions for finding out about the weather in SF and LA:\n\n1. Check websites such as AccuWeather or Weather Underground for up-to-date weather forecasts for San Francisco and Los Angeles.\n2. Tune into local news channels or watch online streams to see the latest weather updates and forecasts.\n3. Use a weather app on your phone or computer to get real-time weather information for SF and LA.\n4. Consider purchasing a weather radio to receive broadcasts from local authorities.\n5. If you're looking for more detailed information, such as wind direction or temperature, consider using a weather app that provides this information.", 'messages': [AIMessage(content="Sorry, I'm not able to provide that information. However, I can offer some suggestions for finding out about the weather in SF and LA:\n\n1. Check websites such as AccuWeather or Weather Underground for up-to-date weather forecasts for San F

### Stream Tokens

In [26]:
# llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, streaming=True)
llm = ChatOpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio", temperature=0, streaming=True)

agent = create_openai_functions_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools)

In [27]:
async for chunk in agent_executor.astream_log(
    {"input": "what is the weather in sf", "chat_history": []},
    include_names=["ChatOpenAI"],
):
    print(chunk)

RunLogPatch({'op': 'replace',
  'path': '',
  'value': {'final_output': None,
            'id': '3afdb436-9e62-4f0a-8aae-2701faa28688',
            'logs': {},
            'name': 'AgentExecutor',
            'streamed_output': [],
            'type': 'chain'}})
RunLogPatch({'op': 'add',
  'path': '/logs/ChatOpenAI',
  'value': {'end_time': None,
            'final_output': None,
            'id': '52d65380-f06f-4fb5-8ae1-f55d211ed21b',
            'metadata': {},
            'name': 'ChatOpenAI',
            'start_time': '2024-04-16T15:58:36.123+00:00',
            'streamed_output': [],
            'streamed_output_str': [],
            'tags': ['seq:step:3'],
            'type': 'llm'}})
RunLogPatch({'op': 'add',
  'path': '/logs/ChatOpenAI/streamed_output_str/-',
  'value': 'The'},
 {'op': 'add',
  'path': '/logs/ChatOpenAI/streamed_output/-',
  'value': AIMessageChunk(content='The', id='run-52d65380-f06f-4fb5-8ae1-f55d211ed21b')})
RunLogPatch({'op': 'add',
  'path': '/logs/ChatOp