In [None]:
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# 🔧 Initialize the LLM (OpenAI Chat model)
llm = ChatOpenAI()

# 📁 Set up local storage to cache embeddings and avoid redundant calls
cache_dir = LocalFileStore("./.cache/")

# 📄 Step 1: Load a text file and split it into overlapping chunks
loader = UnstructuredFileLoader("./files/chapter_one.txt")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
docs = loader.load_and_split(text_splitter=splitter)

# 🧠 Step 2: Initialize OpenAI embeddings with caching for efficiency
embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir
)

# 🗂️ Step 3: Build a FAISS vectorstore from the split documents
vectorstore = FAISS.from_documents(docs, cached_embeddings)

print("✅ Vector store created with", len(docs), "chunks")

# 🤖 Step 4: Create a RetrievalQA chain to answer questions from the vectorstore
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_rerank",  # "stuff," "map_reduce", "refine", "map_rerank", etc.
    retriever=vectorstore.as_retriever(),
)

# ❓ Step 5: Ask a sample question to test it all
response = chain.run("Describe Victory Mansions")
print("🔍 Answer:", response)


✅ Vector store created with 16 chunks




🔍 Answer: Victory Mansions is a building with glass doors that Winston Smith entered on a cold day in April. The hallway smelled of boiled cabbage and old rag mats. There was a large colored poster of a man's face on the wall. The flat was on the seventh floor, and Winston, who was thirty-nine, had a varicose ulcer above his right ankle. Inside the flat, there was a telescreen that could not be completely shut off and a voice reading out figures related to pig-iron production.


In [5]:
results = vectorstore.similarity_search("where does winston live")

results

[Document(id='70e4748e-01c1-40a1-a447-a6a3570ed11b', metadata={'source': './files/chapter_one.txt'}, page_content="The Ministry of Love was the really frightening one. There were no windows in it at all. Winston had never been inside the Ministry of Love, nor within half a kilometre of it. It was a place impossible to enter except on official business, and then only by penetrating through a maze of barbed-wire entanglements, steel doors, and hidden machine-gun nests. Even the streets leading up to its outer barriers were roamed by gorilla-faced guards in black uniforms, armed with jointed truncheons.\nWinston turned round abruptly. He had set his features into the expression of quiet optimism which it was advisable to wear when facing the telescreen. He crossed the room into the tiny kitchen. By leaving the Ministry at this time of day he had sacrificed his lunch in the canteen, and he was aware that there was no food in the kitchen except a hunk of dark-coloured bread which had got to