## **Installing Essential Packages**

In [None]:
!pip install langchain-google-genai

In [None]:
!pip install langchain

In [None]:
!pip install -U langchain langchain-core langchain-community langchain-text-splitters


In [None]:
!pip install sentence-transformers  # required for HuggingFace embeddings
!pip install faiss-cpu

In [None]:
!pip install pypdf

In [None]:
from google.colab import userdata
gkey=userdata.get('GAPI')

# **PDF Loader**

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/content/LoveStories.pdf")
docs = loader.load()
print(len(docs), docs[0].metadata)

# **Tokenizer**

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(docs)

print("chunks:", len(chunks))
# each element in `chunks` is a Document with .page_content and .metadata
print("first chunk content preview:", chunks[0].page_content[:300])


# **Embedding**

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# Choose any HuggingFace embedding model
emb = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# **VectorDB and Retrieval**

In [None]:
# Create FAISS index from your chunks
index = FAISS.from_documents(chunks, emb)
# Build retriever
retriever = index.as_retriever(search_kwargs={"k": 5})

results_with_scores = index.similarity_search_with_score("A STRANGE TWIST OF FATE ", k=5)
for doc, score in results_with_scores:
    print(doc.metadata.get("source"), score)


# **Gemini API Integration**

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
import os

os.environ["GOOGLE_API_KEY"] = gkey

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",  # or gemini-1.5-pro
    temperature=0,
)
prompt = "Create a JSON query for: 'movies about aliens in 1980'"

# modern invoke
response = llm.invoke(prompt)

# --- Safe extraction of text content (new LCEL-compatible) ---
if isinstance(response.content, str) and response.content.strip():
    query_spec = response.content
else:
    # Gemini sometimes puts text inside response_metadata
    query_spec = response.response_metadata["candidate_texts"][0]

print(query_spec)

# **LCEL Implementation**

In [None]:
# minimal LCEL pipeline using index.similarity_search_with_score directly
import itertools
from langchain_core.runnables import RunnableLambda
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)

# Runnable that calls the FAISS index directly and returns a mapping {context, question}
search_runnable = RunnableLambda(
    lambda inp: {
        "context": "\n\n".join(
            f"[source:{(d.metadata.get('source') if getattr(d, 'metadata', None) else '')}] {d.page_content[:2000]}"
            for d, _score in index.similarity_search_with_score(inp["question"], k=5)
        ),
        "question": inp["question"],
    }
)

prompt = ChatPromptTemplate.from_template(
    """You are a helpful assistant. Use ONLY the CONTEXT to answer the QUESTION.
If the answer is not in the context, reply "I don't know".

CONTEXT:
{context}

QUESTION: {question}

Answer concisely."""
)

pipeline = search_runnable | prompt | llm

resp = pipeline.invoke({"question": "Title of the chapters"})

# safe extraction
out = getattr(resp, "content", None) or getattr(resp, "text", None) or str(resp)
print(out.strip())


# **RetrievalQA**

In [None]:
from langchain_classic.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
# Gemini LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",    # or gemini-1.5-pro
    temperature=0
)
# Create QA chains
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=retriever
)
# Query
print(qa_stuff.run('Title of the chapters'))

In [None]:
from langchain_classic.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
# Gemini LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",    # or gemini-1.5-pro
    temperature=0
)
# Create QA chains
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='map_reduce',
    retriever=retriever
)
# Query
print(qa_stuff.run('Title of the chapters'))

# **ConversationChain and ConversationBufferMemory**

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_classic.chains import ConversationChain
from langchain_classic.memory import ConversationBufferMemory

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)


memory = ConversationBufferMemory(memory_key="history", return_messages=True)

chat = ConversationChain(llm=llm, memory=memory, verbose=False)

print("Me   : Hi, my name is Shailesh.")
print("Bot  : ",chat.predict(input="Hi, my name is Shailesh."))
print("Me   : Remember that I like sci-fi movies.")
print("Bot  : "+chat.predict(input="Remember that I like sci-fi movies."))
print("Me   : What is my name and what do I like?")
answer = chat.predict(input="What is my name and what do I like?")
print("Bot  : "+answer)


# **Tool Calling**

In [None]:
from langchain.tools import tool
from langchain_google_genai import ChatGoogleGenerativeAI

@tool
def add(a: int, b: int):
    """Return a + b."""
    return a + b

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    model_kwargs={
        "tools": [add]
    }
)

response = llm.invoke("What is 12 + 9?")
print(response.content)


# **Online RAG**

In [None]:
from langchain_classic.schema import BaseRetriever, Document   # BaseRetriever is the expected type
from langchain_classic.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
import requests
from bs4 import BeautifulSoup

def fetch_text(url: str) -> str:
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    r = requests.get(url, headers=headers, timeout=10)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")
    for s in soup(["script", "style"]):
        s.decompose()
    return "\n".join(line.strip() for line in soup.get_text().splitlines() if line.strip())

# create a Document from a live URL
url = "https://en.wikipedia.org/wiki/Artificial_intelligence"
page_text = fetch_text(url)
web_doc = Document(page_content=page_text, metadata={"source": url})

# Subclass BaseRetriever so RetrievalQA accepts it
class WebOnlyRetriever(BaseRetriever):
    # Declare the fields as class attributes with type hints.
    # Pydantic will pick these up.
    doc: Document
    k: int = 4

    # Removed the __init__ method. Pydantic's BaseModel will generate one
    # that expects these fields as constructor arguments.

    def _get_relevant_documents(self, query: str) -> list[Document]: # Implemented abstract method
        # return the fetched document first (you can add ranking logic here)
        return [self.doc]

    async def _aget_relevant_documents(self, query: str) -> list[Document]: # Implemented abstract method
        return self._get_relevant_documents(query)

# instantiate retriever
# Now, instantiate WebOnlyRetriever by passing the fields as keyword arguments
combined = WebOnlyRetriever(doc=web_doc, k=4)

# build LLM + classic RetrievalQA (legacy API)
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="map_reduce", retriever=combined)

print(qa.run("What is artificial Intelligence"))

# **Graph Rag**

In [None]:
import requests
from langchain_google_genai import ChatGoogleGenerativeAI

def sparql(query):
    url = "https://query.wikidata.org/sparql"
    headers = {"Accept": "application/sparql-results+json"}
    r = requests.get(url, params={"query": query}, headers=headers)
    return r.json()

entity = "Q42"   # Douglas Adams

# retrieve graph neighbors
q = f"""
SELECT ?propLabel ?valueLabel WHERE {{
  wd:{entity} ?prop ?value .
  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
}}
LIMIT 30
"""

graph = sparql(q)
print(graph)



In [None]:

# build simple context
context = "\n".join(
    f"{b['propLabel']['value']}: {b['valueLabel']['value']}"
    for b in graph["results"]["bindings"]
)

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

prompt = f"Use the following graph context to answer:\n\n{context}\n\nQ: Who is Douglas Adams?"

print(llm.invoke(prompt).content)