<a href="https://colab.research.google.com/github/solomontessema/building-ai-agents/blob/main/notebooks/5.3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qU langchain==1.1.0  langchain-openai==1.1.0  langchain-community==0.4.1  faiss-cpu==1.13.2 "unstructured[all-docs]"


In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from dotenv import load_dotenv
import os
import requests

load_dotenv()

os.makedirs("docs", exist_ok=True)
file_path = "docs/langgraph_intro.txt"

GITHUB_RAW_URL = (
    "https://raw.githubusercontent.com/"
    "solomontessema/building-ai-agents/main/assets/langgraph_intro.txt"
)

response = requests.get(GITHUB_RAW_URL)
response.raise_for_status()

with open(file_path, "w", encoding="utf-8") as f:
    f.write(response.text)

# Load and chunk documents
loader = TextLoader(file_path)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(documents)

# Create embeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# Build and save the index
vectorstore = FAISS.from_documents(chunks, embeddings)
vectorstore.save_local("faiss_index")


vectorstore = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)

retriever = vectorstore.as_retriever()
results = retriever.invoke("What is LangGraph?")
print(results[0].page_content)


