# LLM Bot Demo with MLRun

In [None]:
from src import create_and_set_project

### Setup Project

In [None]:
ENV_FILE = "mlrun.env" # update to your .env file

In [None]:
project = create_and_set_project(env_file=ENV_FILE, git_source="git://github.com/mlrun/demo-llm-bot#main")

### Ingest Data and Deploy LLM

In [None]:
project.run(
    name="main",
    arguments={
        "source_directory" : "data/mlrun_docs_md",
        "urls_file" : "data/urls/mlops_blogs.txt"
    },
    watch=True,
    dirty=True
)

### Query LLM via API

In [None]:
import os
import requests

# Required for inferencing depending on inside/outside container
is_docker = os.path.exists('/.dockerenv') or os.path.isfile('/proc/self/cgroup') and any('docker' in line for line in open('/proc/self/cgroup'))
host = "host.docker.internal" if is_docker else "localhost"

# Get model serving endpoint port
serving_fn = project.get_function("serve-llm", sync=True)
port = serving_fn.get_url().split(":")[-1]

MODEL_ENDPOINT_URL = f"http://{host}:{port}"
print(f"Model endpoint: {MODEL_ENDPOINT_URL}")

In [None]:
resp = requests.post(url=MODEL_ENDPOINT_URL, json={"question" : "how I deploy ML models?", "chat_history" : []})
resp.json()

In [None]:
resp = requests.post(url=MODEL_ENDPOINT_URL, json={"question" : "How much do penguins weigh?", "chat_history" : []})
resp.json()

### Query LLM via Chat Interface

In [None]:
import re

import gradio as gr
import requests
from langchain.memory import ConversationBufferMemory
from langchain.schema import messages_to_dict

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


def enrich_docs_url(ai_message: str) -> str:
    pattern = r"data/mlrun_docs_md/(.*?)\.md"
    new_url = r"https://docs.mlrun.org/en/latest/\1.html"
    return re.sub(pattern, new_url, ai_message)


def query_llm(message: str) -> str:
    resp = requests.post(
        url=MODEL_ENDPOINT_URL,
        json={
            "question": message,
            "chat_history": messages_to_dict(memory.chat_memory.messages),
        },
        verify=False,
    )
    resp_json = resp.json()
    ai_message = resp_json["output"]
    memory.save_context({"input": message}, {"output": ai_message})
    ai_message = enrich_docs_url(ai_message=ai_message)
    return ai_message


def reset_memory() -> None:
    memory.clear()
    return None


with gr.Blocks(analytics_enabled=False, theme=gr.themes.Soft()) as chat:
    with gr.Row():
        chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column(scale=6):
            message = gr.Textbox(label="Q:", placeholder="Type a question and Enter")
        with gr.Column(scale=3):
            clear = gr.Button("Clear")

    def respond(message, chat_history):
        bot_message = query_llm(message=message)
        chat_history.append((message, bot_message))
        return "", chat_history

    message.submit(respond, [message, chatbot], [message, chatbot])
    clear.click(reset_memory, None, chatbot, queue=False)

chat.launch(server_name="0.0.0.0", share=True, ssl_verify=False)

In [None]:
chat.close()