## **Installing Dependencies**

In [22]:
# Install the essential libraries from langchain and google.
!pip install langchain langchain-google-genai chromadb requests -q

print("Libraries installed successfully.")

Libraries installed successfully.


### **2. Configure API Key**

In [14]:
import os
from google.colab import userdata


GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
print("Google API Key has been configured.")

Google API Key has been configured.


### **3. Load Data Source**

In [15]:
import requests
from langchain.schema import Document

# URL of the text document we want the RAG system to know about.
url = 'https://langchain-ai.github.io/langgraph/llms.txt'
response = requests.get(url)
text_content = response.text

# We wrap the text in a LangChain `Document` object for compatibility.
docs = [Document(page_content=text_content)]

print("Document loaded successfully.")

Document loaded successfully.


### **4. Split Document into Chunks**
LLMs have a limited context window, so we split the large document into smaller, overlapping chunks. This allows the model to process relevant information efficiently.

In [23]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)

print(f" Document was split into {len(chunks)} chunks.")

 Document was split into 103 chunks.


### **5. Create Embeddings and Vector Store**
*We convert our text chunks into numerical vectors (embeddings) and store them in a searchable Chroma vector database.*

In [7]:
!pip install langchain-community -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.5/2.5 MB[0m [31m174.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m68.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [17]:
import langchain_community
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma

# Initialize the embedding model we'll use.
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings)
print("Vector store created.")

Vector store created.


### **6. Creating the Complete RAG Chain**

In [20]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

# Define the component that retrieves relevant chunks from the vector store.
retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) # Retrieve top 5 chunks

# Define the LLM we'll use to generate the answer.
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.05)

# Define the prompt template to structure the information for the LLM.
template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.

Context:
{context}

Question:
{question}
"""
prompt = ChatPromptTemplate.from_template(template)

# Helper function to combine our retrieved documents into a single string.
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Build the final chain by piping all the components together.
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG chain created successfully.")

RAG chain created successfully.


### **7. Ask a Question**

In [21]:
import textwrap

query = "What is the main difference between LangGraph and LangChain?"
response = rag_chain.invoke(query)

# Print the results in a nicely formatted way.
print("--- Question ---")
print(query)
print("\n--- Answer ---")
print(textwrap.fill(response, width=80))

--- Question ---
What is the main difference between LangGraph and LangChain?

--- Answer ---
Based on the provided text, the FAQ for LangGraph mentions that it covers the
differences between LangGraph and LangChain, but the specific differences are
not detailed in these snippets.  Therefore, I don't know the answer.
