In [1]:
from dotenv import load_dotenv
import os
import json
from pprint import pprint, pp
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters   import RecursiveCharacterTextSplitter
from langchain_huggingface.embeddings import  HuggingFaceEmbeddings
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

from langchain_community.document_loaders import PyPDFLoader,TextLoader
from typing import IO, BinaryIO
from langchain_openai import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import RetrievalQA
from uuid import uuid4
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from typing import Literal

# for Chroma 
import chromadb
from langchain_chroma import Chroma
# for FAISS
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore


In [2]:
%config IPCompleter.greedy=True

In [3]:
load_dotenv("../../../../../tokens/token_env")
#load_dotenv("langsmith_env")os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
#os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
#os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["GOOGLE_API_KEY"]  = os.getenv("GEMINI_API_KEY")

In [4]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
#llm = ChatOpenAI(model='gpt-4o', temperature=0.2)

https://python.langchain.com/docs/integrations/tools/
https://github.com/sunnysavita10/Agentic-2.0/blob/main/langgraph/tools.ipynb

In [5]:
from langgraph.graph import Graph,START,END
from IPython.display import Image,display

In [6]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model= "gemini-2.0-flash")

In [7]:
model.invoke("hi")

AIMessage(content='Hi there! How can I help you today?', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run--70de458e-71dc-4a5f-81f0-d0041c5159ec-0', usage_metadata={'input_tokens': 1, 'output_tokens': 11, 'total_tokens': 12, 'input_token_details': {'cache_read': 0}})

In [8]:
# cfreate vector DB

In [9]:
embeddings.embed_query("Hi")

[-0.006920331157743931,
 -0.03531143441796303,
 0.0015979236923158169,
 0.06535868346691132,
 0.03294550999999046,
 -0.024221159517765045,
 -0.026173047721385956,
 0.049418263137340546,
 0.01622137427330017,
 -0.05166589096188545,
 -0.013382264412939548,
 -0.014528259634971619,
 -0.026054751127958298,
 -0.0032069378066807985,
 0.024590834975242615,
 0.0011219660518690944,
 -0.053469911217689514,
 0.015090166591107845,
 0.011445162817835808,
 0.03395102545619011,
 0.0492703914642334,
 0.02033217065036297,
 -0.013981139287352562,
 0.018897827714681625,
 0.017123384401202202,
 0.02417679876089096,
 0.018276771530508995,
 -0.0011820384534075856,
 0.019578030332922935,
 -0.036849286407232285,
 0.027666538953781128,
 -0.028243232518434525,
 0.027592603117227554,
 -0.016280522570014,
 -0.011755690909922123,
 -0.01602914370596409,
 -0.014077255502343178,
 0.037499915808439255,
 0.01888304017484188,
 -0.037647783756256104,
 0.043473877012729645,
 -0.012413714081048965,
 0.020968012511730194,
 0

In [10]:
from langchain_community.document_loaders import TextLoader,DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [11]:
loader = DirectoryLoader("data",glob="*.txt", loader_cls=TextLoader)

In [12]:
doc = loader.load()

In [13]:
doc

[Document(metadata={'source': 'data/usa.txt'}, page_content="🇺🇸 Overview of the U.S. Economy\nThe United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.\n\nU.S. GDP – Size, Composition, and Global Share\nAs of 2024, the United States’ nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 25% of the global economy. It ranks #1 in the world by nominal GDP, far ahead of China (which ranks 2n

In [14]:
TextSplitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=10)

In [15]:
data = TextSplitter.split_documents(doc)

In [16]:
data

[Document(metadata={'source': 'data/usa.txt'}, page_content='🇺🇸 Overview of the U.S. Economy'),
 Document(metadata={'source': 'data/usa.txt'}, page_content='The United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy,'),
 Document(metadata={'source': 'data/usa.txt'}, page_content='economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological'),
 Document(metadata={'source': 'data/usa.txt'}, page_content='advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology,'),
 Document(metadata={'source': 'data/usa.txt'}, page_content='healthcare, finance, real estate, defense, and agriculture.'),
 D

In [17]:
docs = [ x.page_content for x in data]

In [18]:
docs

['🇺🇸 Overview of the U.S. Economy',
 'The United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy,',
 'economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological',
 'advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology,',
 'healthcare, finance, real estate, defense, and agriculture.',
 'U.S. GDP – Size, Composition, and Global Share',
 'As of 2024, the United States’ nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 25% of the global economy. It ranks #1 in the world by nominal GDP, far ahead of',
 'ahead of China (which ranks 2nd). The U.S. GDP p

In [19]:
doc_string=[x.page_content for x in data]

In [20]:
len(doc_string)

48

In [21]:
# create DB

In [22]:
vectordb = Chroma.from_texts(
          collection_name="speech_collection",
          embedding= embeddings,
          texts=doc_string,
          persist_directory=("chromadb/speech.db")
     )

In [23]:
vectordb.search(search_type='similarity', query="The world must be made safe for democracy. Its")

[Document(metadata={}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
 Document(metadata={}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
 Document(metadata={}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
 Document(metadata={}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no')]

In [24]:
retriever = vectordb.as_retriever(search_kwargs={'k':3 })

In [25]:
retriever.invoke("The world must be made safe for democracy. Its")

[Document(metadata={}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
 Document(metadata={}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
 Document(metadata={}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no')]

### Generation

In [26]:
from pydantic import BaseModel, Field

In [27]:
class TopicSelectionParser(BaseModel):
    Topic: str = Field(description="Selected Topic")
    Reasoning: str = Field(description="Reasoning behind topic selection")


In [28]:
from langchain.output_parsers import PydanticOutputParser

In [29]:
parser = PydanticOutputParser(pydantic_object=TopicSelectionParser)

In [30]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"Topic": {"description": "Selected Topic", "title": "Topic", "type": "string"}, "Reasoning": {"description": "Reasoning behind topic selection", "title": "Reasoning", "type": "string"}}, "required": ["Topic", "Reasoning"]}\n```'

In [31]:
from typing import TypedDict, Annotated, Sequence
import operator
from langchain_core.messages import BaseMessage

In [32]:
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

In [33]:
# this is only for explaination not use in code 
Agentstate={}
Agentstate["messages"]=[]
Agentstate["messages"]=["Hi How are you" ,"what the speech about"]


In [34]:
from langchain.prompts import PromptTemplate

In [58]:
def function_1(state: AgentState):
    question = state["messages"][-1]     #take the recent message this is you question 
    print(question)
    template="""
        Your class is to classify given user query into one of the following categories ["USA","Not Related"]
        only respond with the category name and nothing else
        user_query: {question}
        {format_instructions}
        """
    prompt = PromptTemplate(
        template=template,
        input_variables=["question"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )
    #prompt.invoke({"question": "what is the weather in usa today"})
    chain = prompt | model | parser
    response = chain.invoke({"question": question})    # invoke dictional pass 
    return {"messages": [response.Topic]}  #return AgentState format

In [56]:
state={"messages":["what is the weather today"]}
function_1(state)


what is the weather today


{'messages': ['Not Related']}

what is the weather today


{'messages': ['Not Related']}

In [None]:
def function_2(state: AgentState):
    pass

In [None]:
function_1(state)

In [None]:
def function_3(state: AgentState):
    pass

In [None]:
def router(state: AgentState):
    pass

In [None]:
from langgraph.graph import StateGraph,START,END

In [None]:
workflow=StateGraph(AgentState)

In [None]:
workflow.add_node("Supervisor", function_1)

In [None]:
workflow.add_node("RAG", function_2)

In [None]:
workflow.add_node("LLM", function_3)

In [None]:
workflow.set_entry_point("Supervisor")  # you can use START instead of set_entry_point

In [None]:
workflow.add_conditional_edges("Supervisor",
                               router,
                               {
                                   "RAG Call": "RAG",
                                   "LLM Call": "LLM"
                               }
                               )

In [None]:
app = workflow.compile()

In [None]:
from IPython.display import Image, display

In [None]:
png=app.get_graph().draw_mermaid_png()

In [None]:
display(Image(png))

In [None]:
app.validate()

In [None]:
state["messages"].append("what is the weather in USA")

In [None]:
retriever.invoke("what is the gdp of usa")