In [14]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

cache_dir = LocalFileStore("./.cache/")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Yes\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Yes\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [16]:
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

chain=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    retriever=vectorstore.as_retriever(),
)

chain.run("Describe Victory Mansions")

'Victory Mansions, where Winston Smith resides in George Orwell\'s novel "1984," is a run-down and dilapidated apartment building located in London, the chief city of Airstrip One. The building is shabby and poorly maintained, with narrow corridors, dingy staircases, and overcrowded, grim living conditions for its residents. Victory Mansions is surrounded by a landscape of rotting nineteenth-century houses, bombed sites, and sordid wooden dwellings, reflecting the bleak and impoverished state of society in Oceania. The constant surveillance and propaganda emanating from the nearby Ministry of Truth contribute to the oppressive and hopeless atmosphere within Victory Mansions, reinforcing the themes of control and manipulation present throughout the novel. In this setting, Winston finds a small alcove within his apartment where he can briefly escape the watchful eye of the telescreen and engage in forbidden activities, such as reading rare and prohibited books, like the one he acquired f