# Query analysis

In [1]:
from langserve import RemoteRunnable

In [2]:
from typing import Optional

from langchain_core.pydantic_v1 import BaseModel, Field


class YouTubeSearch(BaseModel):
    """Search over a database of tutorial videos about a software library."""

    youtube_query: str = Field(
        ...,
        description="Similarity search query applied to video transcripts.",
    )
    publish_year: Optional[int] = Field(
        None, description="Year video was published"
    )
        
class APISearch(BaseModel):
    """Search over an API reference for software library."""

    api_query: str = Field(
        ...,
        description="Similarity search query applied to reference documentation.",
    )

In [3]:
runnable = RemoteRunnable("http://localhost:8000/query_analysis/")

In [4]:
from langchain_core.messages import HumanMessage

messages = [HumanMessage("RAG agent tutorial from 2023")]
response = runnable.invoke(
    {"messages": messages, "schema": [YouTubeSearch.schema(), APISearch.schema()]}
)
response

{'data': [{'youtube_query': 'RAG agent tutorial', 'publish_year': 2023}]}

In [5]:
messages = [HumanMessage("what arguments does RunnablePassthrough.assign accept")]
response = runnable.invoke(
    {"messages": messages, "schema": [YouTubeSearch.schema(), APISearch.schema()]}
)
response

{'data': [{'api_query': 'arguments RunnablePassthrough.assign'}]}

Add instructions:

In [6]:
instructions = (
    "Always expand acronym RAG to Retrieval Augmented Generation. "
    "NEVER INCLUDE RAG IN THE SEARCH"
)

messages = [HumanMessage("RAG agent tutorial from 2023")]
response = runnable.invoke(
    {
        "messages": messages,
        "schema": YouTubeSearch.schema(),
        "instructions": instructions,
    }
)
response

{'data': [{'youtube_query': 'RAG agent tutorial', 'publish_year': 2023}]}

Add few-shot examples:

In [7]:
examples = [
    {
        "messages": [HumanMessage("RAG from scratch series")],
        "output": [ 
            {"youtube_query": "Retrieval Augmented Generation from scratch"} 
        ],
    }
]

response = runnable.invoke(
    {
        "messages": messages,
        "schema": YouTubeSearch.schema(),
        "instructions": instructions,
        "examples": examples,
    }
)
response

{'data': [{'youtube_query': 'RAG agent tutorial', 'publish_year': 2023}]}

## Persist analyzers

In [8]:
import requests

url = "http://localhost:8000"

In [9]:
data = {
    "description": "Choose between youtube and api reference queries",
    "schema": [YouTubeSearch.schema(), APISearch.schema()],
    "instructions": instructions,
}

response = requests.post(f"{url}/query_analyzers", json=data)
response

<Response [200]>

In [10]:
qa_id = response.json()["uuid"]

### Add examples

In [11]:
from langchain_core.load import dumpd, load
load(dumpd(messages))

  warn_beta(


[HumanMessage(content='RAG agent tutorial from 2023')]

In [24]:
import json


responses = []
for example in examples:
    create_request = {
        "query_analyzer_id": qa_id,
        "content": dumpd(example["messages"]),
        "output": example['output'],
    }
    response = requests.post(f"{url}/qa_examples", json=create_request)
    responses.append(response)

In [25]:
responses[0].text

'{"uuid":"9175e86e-f2bb-446b-92ea-262eab1a7aca"}'

In [26]:
response = requests.get(f"{url}/qa_examples?query_analyzer_id={qa_id}")
response

<Response [200]>

In [27]:
response.json()

[{'created_at': '2024-03-05T02:01:07.802814',
  'updated_at': '2024-03-05T02:01:07.802819',
  'output': [{'youtube_query': 'Retrieval Augmented Generation from scratch'}],
  'query_analyzer_id': 'e183fe3c-3e12-4ee6-bd7d-a761709fbad2',
  'content': [{'id': ['langchain', 'schema', 'messages', 'HumanMessage'],
    'lc': 1,
    'type': 'constructor',
    'kwargs': {'content': 'RAG from scratch series'}}],
  'uuid': '9175e86e-f2bb-446b-92ea-262eab1a7aca'}]

### Analyze using persisted analyzer

In [91]:
request_data = {"query_analyzer_id": qa_id, "messages": dumpd(messages)}
print(request_data)

{'query_analyzer_id': 'e183fe3c-3e12-4ee6-bd7d-a761709fbad2', 'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'RAG agent tutorial from 2023'}}]}


In [92]:
response = requests.post(f"{url}/analyze", json=request_data)
response

<Response [200]>

In [93]:
response.text

'{"data":[{"publish_year":2023,"youtube_query":"RAG agent tutorial"}]}'