<a href="https://colab.research.google.com/github/parthag1201/RAG-ify/blob/main/rag_from_scratch_P1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Implementing RAG Pipeline using langchain , langsmith for monitoring dashboard and Google Gemini API

In [None]:
# (1) Install required packages (if missing)
! pip install google-generativeai langchain_google_genai chromadb langchain
! pip install langchain_community tiktoken langchain-openai langchainhub


Collecting langchain_google_genai
  Downloading langchain_google_genai-2.1.2-py3-none-any.whl.metadata (4.7 kB)
Collecting chromadb
  Downloading chromadb-1.0.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain_google_genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
INFO: pip is looking at multiple versions of langchain-google-genai to determine which version is compatible with other requirements. This could take a while.
Collecting langchain_google_genai
  Downloading langchain_google_genai-2.1.1-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain_google_genai-2.1.0-py3-none-any.whl.metadata (3.6 kB)
  Downloading langchain_google_genai-2.0.11-py3-none-any.whl.metadata (3.6 kB)
  Downloading langchain_google_genai-2.0.10-py3-none-any.whl.metadata (3.6 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-

In [None]:
# (2) Import Gemini components
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings


In [19]:
from google.colab import userdata # For API Secret

In [33]:
# LangSmith Configuration
import os
from langsmith import traceable   ## To use @traceable on llm calls
os.environ['LANGCHAIN_TRACING'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
LANGCHAIN_API_KEY = userdata.get('Langchain_api')
os.environ['LANGSMITH_PROJECT']='Rag_from_scratch'

In [34]:
# LangChain Libraries
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [35]:
# (3) Configure API keys
import os
GOOGLE_API_KEY = userdata.get('Gemini_API')  # Replace with actual key

# (4) Initialize Gemini components
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# print(splits)

# Rest of the code remains same until vectorstore initialization
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings  # Using Gemini embeddings
)

In [36]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate  # Import from correct submodule
from langchain import LLMChain

In [39]:
# Retriever remains unchanged
retriever = vectorstore.as_retriever()

# (5) Update prompt template for Gemini compatibility
prompt_template = """Answer the question based only on the context:
Context: {context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(prompt_template)
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# (6) Create RAG chain with Gemini
@traceable
def rag(context,question,prompt,llm):
  rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
  )

# Query execution remains same
  print(rag_chain.invoke("What is a Task?"))