In [2]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.embeddings import OllamaEmbeddings 
from langchain_chroma import Chroma 
from langchain_community.chat_models import ChatOllama 
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser



In [3]:
# Create csv loader and vector retriever
loader_4g = CSVLoader(file_path="./documents/kpi_4g.csv")
data_4g = loader_4g.load() 
vector_store_4g = Chroma.from_documents(documents=data_4g , embedding=OllamaEmbeddings(model="llama3") , collection_name="4gkpis")

loader_2g = CSVLoader(file_path="./documents/kpi_2g.csv")
data_2g = loader_2g.load() 
vector_store_2g = Chroma.from_documents(documents=data_2g , embedding=OllamaEmbeddings(model="llama3") , collection_name="2gkpis")

In [5]:
vector_store_4g.similarity_search("DROP")

[Document(metadata={'row': 32, 'source': './documents/kpi_4g.csv'}, page_content='KPI Name: MAX_MAX_RRC_USER\nKPI Formula: max(MAX_RRC_CONN_USER)'),
 Document(metadata={'row': 34, 'source': './documents/kpi_4g.csv'}, page_content='KPI Name: AVG_RRC_CONN_USER\nKPI Formula: avg(AVG_RRC_CONN_USER)'),
 Document(metadata={'row': 30, 'source': './documents/kpi_4g.csv'}, page_content='KPI Name: AVG_MAX_RRC_USER\nKPI Formula: avg(MAX_RRC_CONN_USER)'),
 Document(metadata={'row': 31, 'source': './documents/kpi_4g.csv'}, page_content='KPI Name: SUM_MAX_RRC_USER\nKPI Formula: sum(MAX_RRC_CONN_USER)')]

In [3]:
llm = ChatOllama(model="llama3")
message = """
You are specialize agent in telecommunication field. Answer the question based on provided context and do not format the answer in multiple line. 

{question}

Context: 
{context}
"""
prompt = ChatPromptTemplate.from_template(message)
parser = StrOutputParser()
retriver = Chroma(persist_directory="./db4g" , embedding_function=OllamaEmbeddings(model="llama3")).as_retriever()

chain = {
    "context": retriver , 
    "question": RunnablePassthrough()
} | prompt | llm | parser


print(chain.invoke({"question": "What is the formula for 4G RRC Setup success rate"}))
print(chain.invoke({"question": "What is the formula for 2G TCH Drop?"}))

The formula for 4G RRC Setup success rate is 100*sum(RRC_SETUP_NUM)/sum(RRC_SETUP_DENOM).
The formula for 2G TCH Drop is not provided in the given context. The provided documents are related to 4G KPIs, and there is no mention of 2G or TCH (Traffic Channel).


In [4]:
from langchain_core.runnables import RunnableLambda
from langchain.globals import set_debug , set_verbose
llm = ChatOllama(model="llama3")
chain_technology = (
    ChatPromptTemplate.from_template(
        """You are telecommunication field specialist agent. Given user question below, classify the question is related to which radio field topic, either '4G', '3G', '2G' or 'unclear'
        Do not response other word
        
        User: {question}
        Answer: 
        """
    ) | llm | StrOutputParser()
)

def route(info):
    print(info)
    retriever = Chroma(persist_directory="./db4g" if "4G" in info else "./db2g" ,  embedding_function=OllamaEmbeddings(model="llama3")).as_retriever(
        
    )
    return {
        "context": retriever  , 
        "question": RunnablePassthrough(),
    } | (
        ChatPromptTemplate.from_template("""
        You are specialize agent in telecommunication field. Answer the question based on only provided context and do not format the answer in multiple line.
        If provided context does not related to the question, just mention you don't know

        {question}

        Context: 
        {context}
        """)
    ) | llm | StrOutputParser()
        
full_chain = {"technology":chain_technology, "question": lambda x: x['question']} | RunnableLambda(route)
print(full_chain.invoke({"question": "What is the formula for LTE RRC Setup success rate"}))
print(full_chain.invoke({"question": "What is the formula for 2G TCH Drop?"}))

{'technology': '4G', 'question': 'What is the formula for LTE RRC Setup success rate'}
I don't know. The provided context is related to 2G KPIs, but it doesn't mention LTE or RRC Setup success rate, so I'm not able to provide an answer for that specific question.
{'technology': '2G', 'question': 'What is the formula for 2G TCH Drop?'}
The formula for 2G TCH Drop is 100*sum(TCH_DROP_NUM)/sum(TCH_DROP_DENOM).


In [5]:
Chroma(persist_directory="./db4g" ,  embedding_function=OllamaEmbeddings(model="llama3")).search("What is the formula for 4G RRC Setup success rate", search_type="similarity" )

[Document(metadata={'row': 19, 'source': './documents/kpi_4g.csv'}, page_content='KPI Name: SRVCC_SETUP_SUCCESS_RATE\nKPI Formula: 100*sum(SRVCC_NUM)/sum(SRVCC_DENOM)'),
 Document(metadata={'row': 27, 'source': './documents/kpi_4g.csv'}, page_content='KPI Name: DL_PDCP_SDU_LOSS_RATE\nKPI Formula: 100*sum(DL_PDCP_LOSS_NUM)/sum(DL_PDCP_LOSS_DENOM)'),
 Document(metadata={'row': 28, 'source': './documents/kpi_4g.csv'}, page_content='KPI Name: UL_PDCP_SDU_LOSS_RATE\nKPI Formula: 100*sum(UL_PDCP_LOSS_NUM)/sum(UL_PDCP_LOSS_DENOM)'),
 Document(metadata={'row': 1, 'source': './documents/kpi_4g.csv'}, page_content='KPI Name: RRC_SETUP_SUCCESS_RATE\nKPI Formula: 100*sum(RRC_SETUP_NUM)/sum(RRC_SETUP_DENOM)')]

In [6]:
from langchain_core.pydantic_v1 import BaseModel , Field  
from langchain_experimental.llms.ollama_functions import OllamaFunctions # ChatOllama does not support schema json output
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI

class QueryKPI(BaseModel): 
    """Extract user question regarding KPI in telecommunication field."""
    
    kpi_name : str = Field(description="KPI name for telecommunication field")
    technology: str = Field(description="Technology of radio communication")

model = OllamaFunctions(model="llama3")
# model = ChatOpenAI(
#     model='gpt-3.5-turbo',
#     api_key="API_KEY",
# )
structure_model = model.with_structured_output(QueryKPI)

structure_chain = (
    ChatPromptTemplate.from_template(
        """You are telecommunication field specialist agent. Given user question below, classify the question is related to which radio field topic, output in '4G', '3G', '2G' or 'unclear' only. 
        You should also identify the kpi name related to telecommunication that user is asking and kpi name shall not contain any technology| word such as LTE, 4G, etc.
        
        User: {question}
        """
    ) | structure_model 
)
#print(structure_chain.invoke({"question": "What is the formula for LTE RRC Setup success rate"}).kpi_name)

def _get_kpiname(input)->str: 
    return input['kpi_extraction'].kpi_name

def route(info):
    retriever = Chroma(persist_directory="./db4g" if ("4G" in info['kpi_extraction'].technology or "LTE" in info['kpi_extraction'] ) else "./db2g" ,  embedding_function=OllamaEmbeddings(model="llama3")).as_retriever(search_kwargs={"k":5})
    
    
    return {
        "context": _get_kpiname | retriever , 
        "question": RunnablePassthrough(),
    } | (
        ChatPromptTemplate.from_template("""
        You are specialize agent in telecommunication field. Answer the question based on only provided context and do not format the answer in multiple line.
        If provided context does not related to the question, just mention you don't know

        {question}

        Context: 
        {context}
        
        Return the formula only with kpi name as follow: 
        KPI_NAME=KPI_FORMULA
        """)
    ) | llm | StrOutputParser()
        
full_chain = {"kpi_extraction": structure_chain , "question": lambda x: x['question']} | RunnableLambda(route)
#print(full_chain.invoke({"question": "What is the formula for LTE RRC Setup success rate"}))
#print(full_chain.invoke({"question": "What is the formula for 2G TCH Drop?"}))

In [7]:
full_chain.invoke({"question": "What is the formula for 2G TCH Drop"})

'KPI_NAME=TCH Drop, KPI_FORMULA=100*sum(TCH_DROP_NUM)/sum(TCH_DROP_DENOM)'

In [8]:
full_chain.invoke({"question":"What is the formula for RRC Setup Success Rate?"})

'RRC Setup Success Rate=100*sum(RRC_SETUP_NUM)/sum(RRC_SETUP_DENOM)'

In [9]:
# TO-DO 
# 1. Try dynamic load vector ? 
# 2. Output schema 
# 3. CSV RAG Agent
# 4. LangGraph
# 5. non support LLM
# 6. 