In [None]:
# Install required packages
# pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain langgraph tavily-python

In [None]:
import os

# Load environment variables from .env file
dotenv.load_dotenv()

# Set environment variables
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ["LANGCHAIN_PROJECT"] = "CRAG-LangGraph"

In [None]:
# Import required modules
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

# Define the URLs of the documents to load
urls = [
    "https://div.beehiiv.com/p/advanced-rag-series-indexing",
    "https://div.beehiiv.com/p/advanced-rag-series-retrieval",
    "https://div.beehiiv.com/p/advanced-rag-series-generation-evaluation",
]

# Load the documents from the URLs
docs = [WebBaseLoader(url).load() for url in urls]

# Flatten the list of documents
docs_list = [item for sublist in docs for item in sublist]

# Create a text splitter for chunking the documents
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=50
)

# Split the documents into chunks
doc_splits = text_splitter.split_documents(docs_list)

# Create a Chroma vectorstore from the document chunks
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=OpenAIEmbeddings(),
)

# Create a retriever from the vectorstore
retriever = vectorstore.as_retriever()