<a href="https://colab.research.google.com/github/parthag1201/Authentication_google/blob/main/rag_from_scratch_P1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Implementing RAG Pipeline using langchain , langsmith for monitoring dashboard and Google Gemini API

In [None]:
# (1) Install required packages (if missing)
! pip install google-generativeai langchain_google_genai chromadb langchain
! pip install langchain_community tiktoken langchain-openai langchainhub


In [2]:
# (2) Import Gemini components
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings


In [3]:
from google.colab import userdata # For API Secret

In [24]:
# LangSmith Configuration
import os
from langsmith import traceable   ## To use @traceable on llm calls
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = userdata.get('LANGCHAIN_API_KEY')
# LANGCHAIN_API_KEY = userdata.get('LANGCHAIN_API_KEY')
os.environ['LANGSMITH_PROJECT']='Rag-from-scratch_P1'

In [25]:
from langsmith import utils
utils.tracing_is_enabled()


True

In [26]:
# LangChain Libraries
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [27]:
from google.colab import auth
auth.authenticate_user()

In [28]:
# (3) Configure API keys
import os
os.environ['GOOGLE_API_KEY']=userdata.get('Gemini_API')
# GOOGLE_API_KEY = userdata.get('Gemini_API')  # Replace with actual key

# (4) Initialize Gemini components
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [None]:
print(docs)

In [None]:
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

print(splits)

In [31]:
# Rest of the code remains same until vectorstore initialization
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings,  # Using Gemini embeddings
    # persist_directory="./chrome_db"
)

retriever = vectorstore.as_retriever()

In [34]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate  # Import from correct submodule
from langchain import LLMChain

In [33]:
# (5) Update prompt template for Gemini compatibility
prompt_template = """Answer the question based only on the context:
Context: {context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(prompt_template)
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# (6) Create RAG chain with Gemini
# traceable
# def rag(context,question,prompt,llm):
#   rag_chain = (
#     {"context": retriever | format_docs, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
#   )

# # Query execution remains same
#   rag_chain.invoke("What is a Task?")

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
  )

rag_chain.invoke("What is a Task?")