In [1]:
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
import chromadb
import pandas as pd

In [6]:
def vectordb_targets(db_path:str):
    client = chromadb.PersistentClient(path=db_path)
    for collection in client.list_collections():
        data = collection.get(include=['metadatas'])
    lv1 = list(set([d['First Division'] for d in data["metadatas"]]))
    print(lv1)
    lv2 = list(set([d['Second Division'] for d in data["metadatas"]]))
    print(lv2)
    rag_target = lv1 + lv2
    rag_target.insert(0, "vectorstore")
    rag_target.insert(0, "vectordb")
    docs = ", ".join(rag_target)
    return docs

db_path = "./db/chroma_db_02"
docs = vectordb_targets(db_path=db_path)
docs

['MANUAL', 'PORT', 'Rules']
['ABS', 'Cryostar', 'Win GD', 'KR', 'Port Regulation', 'Integrated Smart Ship(ISS)', 'BV', 'DNV', 'LR', 'MARPOL', 'NK', 'SOLAS', 'Common']


'vectordb, vectorstore, MANUAL, PORT, Rules, ABS, Cryostar, Win GD, KR, Port Regulation, Integrated Smart Ship(ISS), BV, DNV, LR, MARPOL, NK, SOLAS, Common'

In [7]:
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_ollama.chat_models import ChatOllama
from pydantic import BaseModel, Field

In [8]:
# Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""

    datasource: Literal["similarity_search", "vectorstore", "web_search", "database"] = Field(
        ...,
        description="Given a user question choose to route it to web search or a vectorstore or a similarity or a database.",
    )

# Prompt
system = f"""You are an expert at routing a user question to a vectorstore, web search or database.
The vectorstore contains documents related to {docs}, Use the vectorstore for questions on these topics. 
The question contains words of similarity or sim search, Use similarity_search for the question.
The question contains words related to database, Use the database for the question. 
Otherwise, use web-search."""
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

In [None]:
llm = ChatGroq(temperature=0, model_name= "llama-3.2-11b-text-preview")
# llm = ChatOllama(base_url="http://localhost:11434", model="llama3.2:latest")
structured_llm_router = llm.with_structured_output(RouteQuery)
question_router = route_prompt | structured_llm_router
question_router

ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are an expert at routing a user question to a vectorstore, web search or database.\nThe vectorstore contains documents related to vectordb, vectorstore, MANUAL, PORT, Rules, ABS, Cryostar, Win GD, KR, Port Regulation, Integrated Smart Ship(ISS), BV, DNV, LR, MARPOL, NK, SOLAS, Common, Use the vectorstore for questions on these topics. \nThe question contains words of similarity or sim search, Use similarity_search for the question.\nThe question contains words related to database, Use the database for the question. \nOtherwise, use web-search.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})])
| RunnableBinding(bound=ChatGroq(client=

In [15]:
print(question_router.invoke({"question": "who is Son Heung-min"}))
print(question_router.invoke({"question": "according to database, who is Son Heung-min"}))
print(question_router.invoke({"question": "according to vectordb, who is Son Heung-min"}))
print(question_router.invoke({"question": "check the recent performance data of Son Heung min"}))
print(question_router.invoke({"question": "sim search for this sentence. who is Son Heung-min"}))
print(question_router.invoke({"question": "find similar sentences for this sentence. who is Son Heung-min"}))

datasource='web_search'
datasource='database'
datasource='vectorstore'
datasource='web_search'
datasource='similarity_search'
datasource='similarity_search'


In [13]:
query = 'with reference to "lr" rule, explain the measurement procedure of "noise"'
print(question_router.invoke({"question": query}))

datasource='vectorstore'


In [14]:
query = 'similarity search for this. with reference to "lr" rule, explain the measurement procedure of "noise"'
print(question_router.invoke({"question": query}))

datasource='similarity_search'
