<a href="https://colab.research.google.com/github/khajum/python/blob/main/rag/rag-application-2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain_community langchainhub chromadb langchain langchain-openai langchain-google-genai

In [None]:
from google.colab import userdata
import os
os.environ['OPENAI_API_KEY'] = userdata.get('openai-api-key')

In [None]:
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(web_path=["https://www.educosys.com/course/genai"])
docs = loader.load()
print(docs)

In [None]:
!pip install langchain-text-splitters

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splits = text_splitter.split_documents(docs)

In [None]:
print(splits[0])
print(splits[2])

In [None]:
print(len(splits))

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma

# Ensure GEMINI_API_KEY is available from google.colab.userdata
# For this to work, you need to make sure `GEMINI_API_KEY` is set in Colab secrets.
os.environ['GEMINI_API_KEY'] = userdata.get('GEMINI_API_KEY') # Uncommented this line to set the environment variable

# 3) Create embeddings ( Gemini embeddings) + vector store
embedding_model = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001", google_api_key=os.environ.get('GEMINI_API_KEY'))
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)

In [None]:
print(vectorstore._collection.count())

In [None]:
print(vectorstore._collection.get())

In [None]:
print("\n Collection 1 - ", vectorstore._collection.get(ids=['ddda27d5-2e6b-43d1-a047-8e2e2ab9f5db'], include=["embeddings","documents"]))
print("\n Collection 2 - ", vectorstore._collection.get(ids=['0cc7b678-d375-4720-9a12-3a4917d687cd'], include=["embeddings","documents"]))

In [None]:
# 4) Build a retriever
retriever = vectorstore.as_retriever()

In [None]:
from langchain_community import hub
# 5) Pull a RAG prompt from LanchainHub
prompt = hub.pull("rlm/rag-prompt")

In [None]:
!pip install --upgrade langchain langchainhub

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
# 6) Initialize Gemini LLM
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)
parser = StrOutputParser()

In [None]:
# Compose: (retriever -> format context -> prompt ->llm ->prase)
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def answer_question(question: str):
    # Retrive relavant docs
    docs = retriever.get_relevant_documents(query=question)
    context = format_docs(docs)

    # Run Chain
    rag_chain = prompt | retriever | format_docs | llm | parser
    return rag_chain.invoke("context" : context, "question": question)


In [None]:
# 7) Ask something
result = answer_question("What is GenAI?")
print(result)