Config the model


In [2]:
from dotenv import load_dotenv
load_dotenv()
import os
from langchain_google_genai import ChatGoogleGenerativeAI

google_api_key=os.environ['Gemini_API_key']


In [3]:

model=ChatGoogleGenerativeAI(model='gemini-1.5-flash',google_api_key=google_api_key)
output=model.invoke("hi")
print(output.content)

Hi there! How can I help you today?


Config the embedding model


In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
len(embeddings.embed_query("hi"))

  from .autonotebook import tqdm as notebook_tqdm


384

In [5]:

from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

In [6]:
loader=DirectoryLoader("data",loader_cls=TextLoader)


In [7]:
docs=loader.load()
docs

[Document(metadata={'source': 'data\\usa.txt'}, page_content="🇺🇸 Overview of the U.S. Economy\nThe United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.\n\nU.S. GDP – Size, Composition, and Global Share\nAs of 2024, the United States’ nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 25% of the global economy. It ranks #1 in the world by nominal GDP, far ahead of China (which ranks 2

In [8]:

text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50
)
new_docs=text_splitter.split_documents(documents=docs)
doc_string=[doc.page_content for doc in new_docs]

len(doc_string),len(new_docs)

doc_string,new_docs

(['🇺🇸 Overview of the U.S. Economy',
  'The United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy,',
  'It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a',
  'a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services.',
  'innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.',
  'U.S. GDP – Size, Composition, and Global Share',
  'As of 2024, the United States’ nominal GDP is estimated to be around $28 trillion USD, accounting for approxi

In [9]:
index = faiss.IndexFlatL2(384)


In [10]:
vectordb = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)
retriever = vectordb.as_retriever(search_kwargs={"k": 3})


## creation of pydantic class


In [11]:
from pydantic import BaseModel , Field
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
import operator
from langchain.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser







In [12]:
class TopicSelectionParser(BaseModel):#structure of output of supervisor
    Topic:str=Field(description="selected topic")
    Reasoning:str=Field(description="reasoning behind topic selection")
    

In [13]:
from langchain.output_parsers import PydanticOutputParser


In [14]:
parser=PydanticOutputParser(pydantic_object=TopicSelectionParser)

In [15]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"Topic": {"description": "selected topic", "title": "Topic", "type": "string"}, "Reasoning": {"description": "reasoning behind topic selection", "title": "Reasoning", "type": "string"}}, "required": ["Topic", "Reasoning"]}\n```'

In [16]:
Agentstate={}

In [17]:
Agentstate["message"]=[]

In [18]:
Agentstate

{'message': []}

In [91]:
Agentstate['message'].append("hi how r u?")

In [92]:
Agentstate['message'].append("Iam fine")

In [93]:
class Agentstate(TypedDict):
    message: Annotated[Sequence[BaseMessage],operator.add]

In [None]:
def function_1(state:Agentstate):
    question=state["message"][-1] # recent msg
    print('question',question)

    template="""
    Your task is to classify the given user query into one of the following categories: [USA,Not Related,]. 
    Only respond with the category name and nothing else.

    User query: {question}
    {format_instructions}
    """
    prompt= PromptTemplate(
        template=template,
        input_variable=["question"], #user query
        partial_variables={"format_instructions": parser.get_format_instructions()}# format of output
        )

    chain=prompt | model| parser
    response = chain.invoke({"question":question})
    print("Parsed response:", response)
    
    return {"message": [response.Topic]}





    


NameError: name 'Agentstate' is not defined

In [172]:
state={'message':['what is 1 USA DOLLAR TO INR']}
state
function_1(state=state)

question what is 1 USA DOLLAR TO INR
Parsed response: Topic='USA' Reasoning='The query asks for the conversion of the US dollar (USD) to Indian rupees (INR), directly referencing the USA through its currency.'


{'message': ['USA']}

In [173]:
def router(state:Agentstate):
    print("-> ROUTER ->")
    last_message=state['message'][-1]
    if "usa" in last_message.lower():
        return "RAG CALL"
    else:
        return "LLM CALL"

In [174]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [175]:

# RAG Function
def function_2(state:Agentstate):
    print("-> RAG Call ->")
    
    question = state["message"][0]
    
    prompt=PromptTemplate(
        template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:""",
        
        input_variables=['context', 'question']
    )
    
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )
    result = rag_chain.invoke(question)
    return  {"message": [result]}


In [176]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000017CC99E0980>, search_kwargs={'k': 3})

In [189]:
def function_3(state:Agentstate):
    print("-> LLM Call ->")
    question = state["message"][0]
    
    # Normal LLM call
    complete_query = "Anwer the follow question with you knowledge of the real world. Following is the user question: " + question
    response = model.invoke(complete_query)
    return {"message": [response.content]}

In [190]:
from langgraph.graph import StateGraph,END

In [191]:
workflow=StateGraph(Agentstate)

In [192]:
workflow.add_node("supervisor",function_1)

<langgraph.graph.state.StateGraph at 0x17cc9971590>

In [193]:
workflow.add_node("Rag",function_2)

<langgraph.graph.state.StateGraph at 0x17cc9971590>

In [194]:
workflow.add_node('llm',function_3)

<langgraph.graph.state.StateGraph at 0x17cc9971590>

In [195]:
workflow.set_entry_point("supervisor")

<langgraph.graph.state.StateGraph at 0x17cc9971590>

In [196]:
workflow.add_conditional_edges(
    "supervisor",
    router,{
        "RAG CALL":'Rag',
        "LLM CALL":'llm'
    }
)

<langgraph.graph.state.StateGraph at 0x17cc9971590>

In [197]:
workflow.add_edge("Rag",END)
workflow.add_edge("llm",END)

<langgraph.graph.state.StateGraph at 0x17cc9971590>

In [198]:
app=workflow.compile()


In [199]:
state={'message':['time now']}
app.invoke(state)

question time now
Parsed response: Topic='Not Related' Reasoning="The query 'time now' is a request for the current time and is not related to the USA."
-> ROUTER ->
-> LLM Call ->


{'message': ['time now',
  'Not Related',
  'I do not have access to real-time information, including the current time.  To get the current time, please check a clock, your computer or phone, or an online time service.']}

In [200]:
state={"message":["what is a gdp of usa?"]}
app.invoke(state)



question what is a gdp of usa?
Parsed response: Topic='USA' Reasoning='The query explicitly asks for the GDP of the USA.'
-> ROUTER ->
-> RAG Call ->


{'message': ['what is a gdp of usa?',
  'USA',
  "I don't know the exact current GDP of the USA.  The GDP is a constantly changing figure.  To find the most up-to-date information, you should consult a reliable economic data source."]}

In [201]:
state['message']

['what is a gdp of usa?']