In [4]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
    CharacterTextSplitter,
)

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=600, chunk_overlap=100, separator="\n"
)

loader = UnstructuredFileLoader("./files/1984.docx")

docs = loader.load_and_split(text_splitter=splitter)

In [5]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore

cache_dir = LocalFileStore("./.cache/")


embedder = OpenAIEmbeddings()

cache_embeddings = CacheBackedEmbeddings.from_bytes_store(embedder, cache_dir)
vectorstore = FAISS.from_documents(docs, cache_embeddings)

In [8]:
from langchain.chains import RetrievalQA


llm = ChatOpenAI()

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_rerank",
    retriever=vectorstore.as_retriever(),
)

chain.run("Describe Victory Mansions")


# len(results)



'Victory Mansions is described as having a hallway that smells of boiled cabbage and old rag mats, with a large colored poster of a man\'s face about forty-five years old with a heavy black mustache and ruggedly handsome features. The flat itself is seven flights up, with a telescreen on one wall that cannot be completely turned off. The building is run-down, with an unreliable lift and the presence of propaganda posters, such as the one depicting Big Brother with the caption "BIG BROTHER IS WATCHING YOU."'