In [None]:
%pip install langchain langchain-openai faiss-cpu tiktoken

In [None]:
import os
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA



# --- 1. Setup Environment ---
# Make sure to set your OpenAI API key in your environment variables
# For example: os.environ["OPENAI_API_KEY"] = "your_api_key_here"
if "OPENAI_API_KEY" not in os.environ:
    print("Please set your OPENAI_API_KEY environment variable.")
    exit()

# --- 2. Load Documents ---
# For this example, let's create a dummy text file.
documents_content = """
The Eiffel Tower, located in Paris, France, is one of the most famous landmarks in the world.
It was designed by Gustave Eiffel and completed in 1889. It stands at 330 meters tall.

The Great Wall of China is a series of fortifications made of stone, brick, tamped earth, wood, and other materials.
It was built to protect the Chinese states and empires against the raids and invasions of the various nomadic groups of the Eurasian Steppe.
Several walls were being built as early as the 7th century BC.
"""
with open("sample_doc.txt", "w") as f:
    f.write(documents_content)

loader = TextLoader("./sample_doc.txt")
docs = loader.load()

# --- 3. Split Documents into Chunks ---
# The SemanticChunker from your context is an advanced option.
# For simplicity, we'll use a more standard splitter here.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
split_docs = text_splitter.split_documents(docs)

print(f"Split into {len(split_docs)} chunks.")
for i, doc in enumerate(split_docs):
    print(f"--- Chunk {i+1} ---\n{doc.page_content}\n")

# --- 4. Create Embeddings and Store in FAISS ---
# We'll use OpenAI embeddings, but you can use any LangChain-compatible embedding model.
embeddings = OpenAIEmbeddings()

# This creates the FAISS vector store from the documents and embeddings.
# The documents are embedded and indexed automatically.
print("Creating FAISS vector store...")
vector_store = FAISS.from_documents(split_docs, embeddings)
print("FAISS vector store created.")

# --- 5. Perform a Similarity Search ---
query = "How tall is the Eiffel Tower?"
search_results = vector_store.similarity_search(query)

print("\n--- Similarity Search Results ---")
for result in search_results:
    print(f"Content: {result.page_content}")
    print("-" * 20)

# --- 6. Create a RAG Chain ---
# Define the prompt template for the LLM
prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

llm = ChatOpenAI()

# Create a chain that will combine the documents into a single string
# Create a retriever from our vector store
retriever = vector_store.as_retriever()

# Create the final RetrievalQA chain that gets documents, then passes them to the LLM.
# Use the "stuff" chain type and supply our prompt template via chain_type_kwargs.
retrieval_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
print("\n--- Invoking RAG Chain ---")
# RetrievalQA returns a string for a query, so use run()
answer = retrieval_chain.run("How tall is the Eiffel Tower?")

print("\nAnswer:")
print(answer)
print("\nAnswer:")
print(response["answer"])

# Clean up the dummy file
os.remove("sample_doc.txt")

