In [4]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader

In [5]:
# load and process the text files
loader = DirectoryLoader('../data/selected', glob="./*.md", loader_cls=TextLoader)
documents = loader.load()

# split text
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
texts = text_splitter.split_documents(documents)
len(texts)
texts[3]

persist_directory = '../data/chromadb'
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

# persist the db to disk
vectordb.persist
vectordb = None

vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

In [6]:
retriever = vectordb.as_retriever()

# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(),
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

## Cite sources
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])
        
        
retriever = vectordb.as_retriever()

# full example
query = "What is pcare?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 pCare is a company that specializes in patient engagement solutions and collaborates with industry leaders to provide patients with education, entertainment, and engagement. They are located in Lake Success, NY and use cookies on their website to enhance user experience.


Sources:
../data/selected/pcare_about-us.md
../data/selected/pcare_about-us.md
../data/selected/pcare_about-us.md
../data/selected/pcare_about-us.md
