# Interactive bot demo using LLMs and MLRun

In [2]:
import os
import mlrun
import openai

# Copy mlrun.env and populate with relevant credentials
ENV_FILE = "us-cst-352-2.env"

mlrun.set_env_from_file(ENV_FILE)
project = mlrun.get_or_create_project("llamagpt")

openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")

> 2023-06-16 09:51:44,066 [info] loaded project llamagpt from MLRun DB


### Index Documents

In [3]:

from src import LlmIndex

llmi = LlmIndex(additional_tools=["llm-math"])

llmi.add_index(
    index_name="llama",
    documents=llmi.load_documents(documents_dir="data/sample"),
    summary="useful for questions about llamas"
)
llmi.add_index(
    index_name="strawberries",
    documents=llmi.load_documents(loader="WikipediaReader", pages=['Strawberry']),
    summary="useful for questions about strawberries"
)

llmi.add_index(
    index_name="mlops",
    documents=llmi.load_documents(
        loader="BeautifulSoupWebReader",
        urls=['https://www.iguazio.com/blog/concept-drift-deep-dive-how-to-build-a-drift-aware-ml-system/']
        ),
    summary="useful for questions about mlops, machine learning, or concept drift"
)


<llama_index.indices.vector_store.base.VectorStoreIndex at 0x29c541b20>

In [4]:

llmi.persist(persist_dir="./storage")

In [5]:
llmi.persist(persist_dir="v3io:///bigdata/storage")

### Query Documents

In [6]:
# load the docs & indexes from storage and run queries
llmi = LlmIndex(additional_tools=["llm-math"])
llmi.load_storage()
llmi.get_catalog()

2023-06-16 09:52:06,047 [info] Loading indices with ids: ['llama']
Loaded llama index useful for questions about llamas
2023-06-16 09:52:06,048 [info] Loading indices with ids: ['strawberries']
Loaded strawberries index useful for questions about strawberries
2023-06-16 09:52:06,049 [info] Loading indices with ids: ['mlops']
Loaded mlops index useful for questions about mlops, machine learning, or concept drift


{'llama': 'useful for questions about llamas',
 'strawberries': 'useful for questions about strawberries',
 'mlops': 'useful for questions about mlops, machine learning, or concept drift'}

In [7]:
print(llmi.query("How tall are llamas?"))
print(llmi.query("Which countries produce strawberries?"))
print(llmi.query("What is MLOps?"))
print(llmi.query("What 5 times the average weight of a llama?"))

2023-06-16 09:52:07,294 [info] Loading indices with ids: ['llama']
2023-06-16 09:52:07,380 [info] Loading indices with ids: ['strawberries']
2023-06-16 09:52:07,381 [info] Loading indices with ids: ['mlops']


[1m> Entering new  chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: llama
Action Input: How tall are llamas?[0m2023-06-16 09:52:08,042 [info] > [retrieve] Total LLM token usage: 0 tokens
2023-06-16 09:52:08,043 [info] > [retrieve] Total embedding token usage: 6 tokens
2023-06-16 09:52:09,200 [info] > [get_response] Total LLM token usage: 1835 tokens
2023-06-16 09:52:09,200 [info] > [get_response] Total embedding token usage: 0 tokens

Observation: [33;1m[1;3m
Llamas are typically between 5 and 6 feet tall.[0m
Thought:[32;1m[1;3mDo I need to use a tool? No
AI: Llamas are typically between 5 and 6 feet tall.[0m

[1m> Finished chain.[0m
Llamas are typically between 5 and 6 feet tall.


[1m> Entering new  chain...[0m
[32;1m[1;3mThought: Do I need 

### Serving Graph

In [8]:
fn = mlrun.code_to_function(
    name="llama",
    filename="src/llmindex.py",
    kind="serving",
    image="mlrun/mlrun:1.3.2",
    requirements=["llama-index==0.6.26", "langchain==0.0.200"]
)
graph = fn.set_topology("flow", engine="async")
graph.add_step(name="llama", class_name="QueryLlamaGpt", persist_dir="v3io:///bigdata/storage", additional_tools=["llm-math"], full_event=True).respond()
fn.set_env("OPENAI_API_KEY", os.getenv("OPENAI_API_KEY"))
fn.set_env("OPENAI_API_BASE", os.getenv("OPENAI_API_BASE"))

project.deploy_function(fn, mock=True)

> 2023-06-16 09:52:24,478 [info] Deploying serving function MOCK (for simulation)...
Turn off the mock (mock=False) and make sure Nuclio is installed for real deployment to Nuclio
2023-06-16 09:52:25,859 [info] Loading indices with ids: ['llama']
Loaded llama index useful for questions about llamas
2023-06-16 09:52:25,861 [info] Loading indices with ids: ['strawberries']
Loaded strawberries index useful for questions about strawberries
2023-06-16 09:52:25,863 [info] Loading indices with ids: ['mlops']
Loaded mlops index useful for questions about mlops, machine learning, or concept drift


DeployStatus(state=ready, outputs={'endpoint': 'Mock', 'name': 'llama'})

In [None]:
query = "How tall are llamas?"
resp = fn.invoke(path="query", body={"query" : query})
resp["response"]

In [None]:
query = "What is 50 times the height of the tallest llama in the world?"
resp = fn.invoke(path="query", body={"query" : query})
resp["response"]

### Gradio App

In [9]:
import gradio as gr

In [None]:
def query_llama(query: str) -> str:
    resp = fn.invoke(path="/query", body={"query" : query})
    query_result = resp["response"]
    return query_result

with gr.Blocks(analytics_enabled=False, theme=gr.themes.Soft()) as demo:
    with gr.Row():
        chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column(scale=6):
            msg = gr.Textbox(label="Q:", placeholder="Type a question and Enter")
        with gr.Column(scale=3):
            clear = gr.Button("Clear")

    def respond(message, chat_history, index):
        bot_message = query_llama(message)
        chat_history.append((message, bot_message))
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()

Expected 3 arguments for function <function respond at 0x2aeaf8f70>, received 2.
Expected at least 3 arguments for function <function respond at 0x2aeaf8f70>, received 2.


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




2023-06-16 09:52:41,682 [info] Loading indices with ids: ['llama']
2023-06-16 09:52:41,683 [info] Loading indices with ids: ['strawberries']
2023-06-16 09:52:41,684 [info] Loading indices with ids: ['mlops']


Unexpected argument. Filling with None.




[1m> Entering new  chain...[0m
[32;1m[1;3mAI: I'm just a machine learning model, so I don't have feelings in the way humans do, but I'm functioning properly and ready to assist you with any questions or tasks you have. How can I help you today?[0m

[1m> Finished chain.[0m


[1m> Entering new  chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: llama
Action Input: tell me about llamas[0m2023-06-16 09:52:52,593 [info] > [retrieve] Total LLM token usage: 0 tokens
2023-06-16 09:52:52,594 [info] > [retrieve] Total embedding token usage: 5 tokens
2023-06-16 09:53:00,523 [info] > [get_response] Total LLM token usage: 2111 tokens
2023-06-16 09:53:00,524 [info] > [get_response] Total embedding token usage: 0 tokens

Observation: [33;1m[1;3m
Llamas are domesticated South American camelids that have been used as a meat and pack animal by Andean cultures since the Pre-Columbian era. They are social animals and live in herds. Their wool is soft and contains only a s