In [10]:
from dotenv import dotenv_values
import os

env_vars = dotenv_values('.env')

os.environ['OPENAI_API_KEY'] = env_vars.get('OPENAI_API_KEY');

In [15]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

In [22]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_rerank",
    retriever=vectorstore.as_retriever(),
)

chain.run("Describe Victory Mansions")



'Victory Mansions is a building with glass doors and a hallway that smells like boiled cabbage and old rag mats. There is a large poster of a man with a black mustache and ruggedly handsome features on one wall. The building has stairs because the lift is often not working. The flat that Winston Smith lives in is on the seventh floor. The poster with the face of the man has the caption "BIG BROTHER IS WATCHING YOU." Inside the flat, there is a fruity voice reading out a list of figures related to the production of pig-iron.'