Setting up the user

In [None]:
import os
os.environ["USER_AGENT"] = "my-RAG-bot/1.0"
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')

We use a web base loader to load the text from the wikipedia page

In [None]:
import requests
from langchain.schema import Document
url = 'https://langchain-ai.github.io/langgraph/llms.txt'
response = requests.get(url)
text = response.text

docs = "".join(text)

Now we split the document into chunks of size 500 with a chunk overlap of 50

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_text(docs)
print(f"Number of chunks: {len(texts)}")

Number of chunks: 207


Now we convert the text into embeddings and store them in a vector store of weaviate

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",google_api_key=GOOGLE_API_KEY,
                             temperature=0.1,convert_system_message_to_human=True)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=GOOGLE_API_KEY)
print("The data has been embedded")

The data has been embedded


Setting up the retriever

In [None]:
retriever = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k":5})


We setup the template of the prompt to be sent to the model

In [None]:
from langchain.prompts import ChatPromptTemplate

template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

print(prompt)

input_variables=['context', 'question'] input_types={} partial_variables={} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks.\nUse the following pieces of retrieved context to answer the question.\nIf you don't know the answer, just say that you don't know.\nUse three sentences maximum and keep the answer concise.\nQuestion: {question}\nContext: {context}\nAnswer:\n"), additional_kwargs={})]


Now we will create the RAG pipeline sending in the formated dictionary of the query, context and instructions and obtaining responses to queries

In [None]:
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.runnables import RunnableLambda
from langchain.schema.output_parser import StrOutputParser
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
RAG_prompt = PromptTemplate.from_template(template)
RAG_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": RAG_prompt}
)
query = "Explain parts of LLMs"
response = RAG_chain.invoke({'query' : query})
print(response['result'])



LLMs consist of components like agent architectures (including routers, tool-calling agents, and memory management)  and methods for handling streaming outputs.  The provided text focuses on documentation and usage of LLMs, not their internal structure.


In [None]:
url = 'https://langchain-ai.github.io/langgraph/llms.txt'
response = requests.get(url)
text = response.text
docs = "".join(text)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_text(docs)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",google_api_key=GOOGLE_API_KEY,
                             temperature=0.1,convert_system_message_to_human=True)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=GOOGLE_API_KEY)
print("The data has been embedded")
retriever = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k":5})
template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use one sentence maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""
RAG_prompt = PromptTemplate.from_template(template)
RAG_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": RAG_prompt}
)
query = "Explain parts of LLMs"
response = RAG_chain.invoke({'query' : query})
print(response['result'])

The data has been embedded




LLMs include components like routers, tool-calling agents, memory management, and planning.


In [None]:
import textwrap
print(f"User: {query}")
wrapped = textwrap.wrap(response['result'],width = 30)
print(f"Bot:")
for line in wrapped:
  print(line)

User: Explain parts of LLMs
Bot:
LLMs include components like
routers, tool-calling agents,
memory management, and
planning.
