In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore

cache_dir = LocalFileStore("./.cache/")


splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

In [4]:
results = vectorstore.similarity_search("where does Miss Austen live")

results

[Document(page_content='anybody born in her decade--that of the eighteenth-century\nseventies--independently exhibited the full romantic quality. Even Scott\nrequired hill and mountain and ballad, even Coleridge metaphysics and\nGerman to enable them to chip the classical shell. Miss Austen was an\nEnglish girl, brought up in a country retirement, at the time when\nladies went back into the house if there was a white frost which might\npierce their kid shoes, when a sudden cold was the subject of the\ngravest fears, when their studies, their ways, their conduct were\nsubject to all those fantastic limits and restrictions against which\nMary Wollstonecraft protested with better general sense than particular\ntaste or judgment. Miss Austen, too, drew back when the white frost\ntouched her shoes; but I think she would have made a pretty good journey\neven in a black one._\n_For if her knowledge was not very extended, she knew two things which\nonly genius knows. The one was humanity, and 

In [5]:
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)

chain.run("where does Miss Austen live")

'Miss Austen lived in a country retirement, but the specific location is not mentioned in the provided context.'

In [6]:
chain.run("Describe country retirement")

'Country retirement refers to living in a rural or countryside setting, away from the hustle and bustle of city life. It typically involves residing in a small town or village, surrounded by nature and open spaces. In a country retirement, individuals often enjoy a slower pace of life, with a focus on tranquility, peace, and a connection to the natural environment. It may involve engaging in activities such as gardening, hiking, exploring nature, or participating in community events. Country retirement can offer a sense of community, a closer connection to the land, and a break from the noise and congestion of urban areas.'

In [9]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_rerank",
    retriever=vectorstore.as_retriever(),
)

chain.run("where does Miss Austen live")



'In a country retirement in England'