<a href="https://colab.research.google.com/github/solomontessema/building-ai-agents/blob/main/notebooks/5.1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<table>
  <tr>
    <td><img src="https://ionnova.com/img/ionnova_logo_name_2.png" width="120px"></td>
    <td><h1>Minimal RAG Pipeline with LangChain</h1></td>
  </tr>
</table>

In [None]:
!pip install -qU langchain==1.1.0 langchain-openai==1.1.0 langchain-community==0.4.1 faiss-cpu==1.13.2 python-dotenv==1.1.1

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
import os
import requests
from dotenv import load_dotenv
load_dotenv()

os.makedirs("docs", exist_ok=True)
file_path = "docs/langgraph_intro.txt"

GITHUB_RAW_URL = (
    "https://raw.githubusercontent.com/"
    "solomontessema/building-ai-agents/main/assets/langgraph_intro.txt"
)

response = requests.get(GITHUB_RAW_URL)
response.raise_for_status()

with open(file_path, "w", encoding="utf-8") as f:
    f.write(response.text)

# Load and chunk documents
loader = TextLoader(file_path)
documents = loader.load()

splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(documents)

# Create embeddings and store in FAISS
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="Use the context below to answer the question:\n\n{context}\n\nQ: {question}"
)

# Build the RAG pipeline
llm = ChatOpenAI(model="gpt-4o", temperature=0)

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt_template
    | llm
    | StrOutputParser()
)

# Run the pipeline
result = rag_chain.invoke("What is LangGraph?")
print(result)
