In [14]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("../../.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("../../files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff", 
    # stuff - 모든 document를 하나로 묶어 llm에 전달
    # refine - document를 여러개의 배치로 나누어 llm전달 후 이전 배치의 응답을 다음 배치와 함께 전달, 응답 개선
    # map_reduce - 각 document를 여러 개의 배치로 나누어 각 배치에 따른 llm 응답 취합
    retriever=vectorstore.as_retriever(), # interface
)

chain.run("Describe Victory Mansions")

'Victory Mansions is a building where Winston Smith lives. The hallway of the building smelt of boiled cabbage and old rag mats. The flat is seven flights up, and Winston\'s flat is described as small, with a fruity voice reading out figures related to pig-iron production. The building is run-down and has issues like the lift not working and the electricity being cut off during daylight hours. Additionally, there is a large colored poster of an enormous face with the caption "BIG BROTHER IS WATCHING YOU" that is prominently displayed inside the building.'