In [1]:
import os

os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"

In [2]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader

In [3]:
# load and process the text files
loader = DirectoryLoader('./data/', glob="./*.csv", loader_cls=TextLoader)

documents = loader.load()

In [4]:
#splitting the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)


In [None]:
persist_directory = 'db'

# swap to local embeddings later on
embedding = OpenAIEmbeddings()

vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)

In [None]:
# persiste the db to disk
vectordb.persist()
vectordb = None

In [None]:
vectordb = Chroma(persist_directory=persist_directory, 
                  embedding_function=embedding)

In [None]:
retriever = vectordb.as_retriever()

In [None]:
docs = retriever.get_relevant_documents("I am 25 years old, and my weight is 110, what should I eat for breakfast for weight loss")

In [None]:
len(docs)

4

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})

In [None]:
retriever.search_type

'similarity'

In [None]:
# create the chain to answer questions 
qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(), 
                                  chain_type="stuff", 
                                  retriever=retriever, 
                                  return_source_documents=True)

In [None]:
# citing sources
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [None]:
query = "What is a healthy breakfast?"
llm_response = qa_chain(query)
process_llm_response(llm_response)



I'm sorry, I cannot provide specific dietary recommendations for weight loss as it is important to consult a licensed nutritionist or doctor for personalized advice. However, generally speaking, a balanced breakfast that includes protein, fiber, and healthy fats can help with weight loss. Examples could include a breakfast bar with whole grain crust and fruit, a bowl of whole grain cereal with nuts and fruit, or a baby food juice with apple and sweet potato. It is important to also consider portion sizes and overall calorie intake for weight loss.


Sources:
data/food.csv
data/food.csv
