In [19]:
import os
import warnings
import langchain
from langchain_ollama import ChatOllama
warnings.filterwarnings("ignore")

In [9]:
from dotenv import load_dotenv

load_dotenv("./.env")

True

### Initializing the LLM

In [12]:
from langchain_ollama import ChatOllama

base_url = "http://localhost:11434"
model = "llama3.2:latest"

llm = ChatOllama(model=model,base_url=base_url)
llm.invoke("hi").content

'How can I assist you today?'

In [21]:
os.environ['LANGSMITH_ENDPOINT']

'https://api.smith.langchain.com'

### Identifying the PDFs in the Directory

In [28]:
## List of all the pdf documents
pdfs=[]
for root, dirs, files in os.walk(r"C:\My Projects\rag-dataset\gym supplements"):
    for file in files:
        if file.endswith(".pdf"):
            pdfs.append(os.path.join(root,file))


In [29]:
pdfs

['C:\\My Projects\\rag-dataset\\gym supplements\\1. Analysis of Actual Fitness Supplement.pdf',
 'C:\\My Projects\\rag-dataset\\gym supplements\\2. High Prevalence of Supplement Intake.pdf']

### Loading all the PDFs

In [30]:
### Load all the pdfs 

from langchain_community.document_loaders import PyMuPDFLoader

docs = []

for pdf in pdfs:
    loader = PyMuPDFLoader(pdf)
    docs.extend(loader.load())

In [38]:
len(docs)

26

In [33]:
print(docs[0].page_content)

Citation: Espeño, P.R.; Ong, A.K.S.;
German, J.D.; Gumasing, M.J.J.; Casas,
E.S. Analysis of Actual Fitness
Supplement Consumption among
Health and Fitness Enthusiasts. Foods
2024, 13, 1424. https://doi.org/
10.3390/foods13091424
Academic Editors: Ilija Djekic
and Nada Smigic
Received: 30 March 2024
Revised: 15 April 2024
Accepted: 18 April 2024
Published: 6 May 2024
Copyright: © 2024 by the authors.
Licensee MDPI, Basel, Switzerland.
This article is an open access article
distributed
under
the
terms
and
conditions of the Creative Commons
Attribution (CC BY) license (https://
creativecommons.org/licenses/by/
4.0/).
foods
Article
Analysis of Actual Fitness Supplement Consumption among
Health and Fitness Enthusiasts
Paolo Renzo Espeño 1, Ardvin Kester S. Ong 1,2,*
, Josephine D. German 1
, Ma. Janice J. Gumasing 3
and Ethan S. Casas 1
1
School of Industrial Engineering and Engineering Management, Mapúa University, 658 Muralla St.,
Intramuros, Manila 1002, Philippines
2
E.T. Yuchengo Scho

### Chunking

In [39]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter  = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap = 200)
chunks = text_splitter.split_documents(docs)

In [40]:
import tiktoken

In [42]:
len(chunks), chunks

(125,
 [Document(metadata={'producer': 'iLovePDF', 'creator': '', 'creationdate': '', 'source': 'C:\\My Projects\\rag-dataset\\gym supplements\\1. Analysis of Actual Fitness Supplement.pdf', 'file_path': 'C:\\My Projects\\rag-dataset\\gym supplements\\1. Analysis of Actual Fitness Supplement.pdf', 'total_pages': 15, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2024-10-21T11:38:50+00:00', 'trapped': '', 'page': 0}, page_content='Citation: Espeño, P.R.; Ong, A.K.S.;\nGerman, J.D.; Gumasing, M.J.J.; Casas,\nE.S. Analysis of Actual Fitness\nSupplement Consumption among\nHealth and Fitness Enthusiasts. Foods\n2024, 13, 1424. https://doi.org/\n10.3390/foods13091424\nAcademic Editors: Ilija Djekic\nand Nada Smigic\nReceived: 30 March 2024\nRevised: 15 April 2024\nAccepted: 18 April 2024\nPublished: 6 May 2024\nCopyright: © 2024 by the authors.\nLicensee MDPI, Basel, Switzerland.\nThis article is an open access article\ndistributed\nunder\nthe\nter

In [46]:
len(chunks)

125

In [56]:
chunks[124]

Document(metadata={'producer': 'iLovePDF', 'creator': '', 'creationdate': '', 'source': 'C:\\My Projects\\rag-dataset\\gym supplements\\2. High Prevalence of Supplement Intake.pdf', 'file_path': 'C:\\My Projects\\rag-dataset\\gym supplements\\2. High Prevalence of Supplement Intake.pdf', 'total_pages': 11, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2024-10-21T11:39:04+00:00', 'trapped': '', 'page': 10}, page_content='answers to some degree.\n5. Conclusions\nIn conclusion, this study indicates a high prevalence of supplement use among Swiss ﬁtness center\nusers. The high use was associated with a low level of information quality. We detected a striking\ndiscrepancy between an obvious desire for high quality evidence-based information and a blatant')

In [49]:
chunks[0].page_content

'Citation: Espeño, P.R.; Ong, A.K.S.;\nGerman, J.D.; Gumasing, M.J.J.; Casas,\nE.S. Analysis of Actual Fitness\nSupplement Consumption among\nHealth and Fitness Enthusiasts. Foods\n2024, 13, 1424. https://doi.org/\n10.3390/foods13091424\nAcademic Editors: Ilija Djekic\nand Nada Smigic\nReceived: 30 March 2024\nRevised: 15 April 2024\nAccepted: 18 April 2024\nPublished: 6 May 2024\nCopyright: © 2024 by the authors.\nLicensee MDPI, Basel, Switzerland.\nThis article is an open access article\ndistributed\nunder\nthe\nterms\nand\nconditions of the Creative Commons\nAttribution (CC BY) license (https://\ncreativecommons.org/licenses/by/\n4.0/).\nfoods\nArticle\nAnalysis of Actual Fitness Supplement Consumption among\nHealth and Fitness Enthusiasts\nPaolo Renzo Espeño 1, Ardvin Kester S. Ong 1,2,*\n, Josephine D. German 1\n, Ma. Janice J. Gumasing 3\nand Ethan S. Casas 1\n1\nSchool of Industrial Engineering and Engineering Management, Mapúa University, 658 Muralla St.,\nIntramuros, Manila 10

### Counting the number of tokens in each chunk

In [59]:
# take a look at the tokens in each chunk
num_tokens = []
encoding  = tiktoken.encoding_for_model("gpt-3.5")
for i in range(0,len(chunks)):
    token_length = len(encoding.encode(chunks[i].page_content))
    num_tokens.append(token_length)


In [65]:
import numpy as np
np.mean(np.array(num_tokens))

219.272

### Document Embeddings

In [73]:
from langchain_ollama import OllamaEmbeddings
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore  import InMemoryDocstore

In [77]:
embedding = OllamaEmbeddings(model = "nomic-embed-text",base_url=base_url)

In [78]:
vector = embedding.embed_query("testing")
index = faiss.IndexFlatL2(len(vector))
index.ntotal, index.d

(0, 768)

### Create the Vector Store

In [80]:
vector_store = FAISS(
    embedding_function=embedding,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [81]:
vector_store.index.ntotal, vector_store.index.d

(0, 768)

In [82]:
ids = vector_store.add_documents(documents=chunks)

In [83]:
len(ids), vector_store.index.ntotal

(125, 125)

In [87]:
db_name = "health_supplements"

vector_store.save_local(db_name)

### Load the Vector Store

In [88]:
vector_store = FAISS.load_local(r"C:\My Projects\health_supplements", embedding, allow_dangerous_deserialization=True)

### Vector search

In [90]:
### Retrieval
question = "how to gain muscle mass?"
docs = vector_store.search(query=question, k=5, search_type="similarity")

In [91]:
docs

[Document(id='e1a61903-f8bc-42a0-ac44-b87600568c9e', metadata={'producer': 'iLovePDF', 'creator': '', 'creationdate': '', 'source': 'C:\\My Projects\\rag-dataset\\gym supplements\\1. Analysis of Actual Fitness Supplement.pdf', 'file_path': 'C:\\My Projects\\rag-dataset\\gym supplements\\1. Analysis of Actual Fitness Supplement.pdf', 'total_pages': 15, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2024-10-21T11:38:50+00:00', 'trapped': '', 'page': 0}, page_content='intensity activities, which is the choice of consumers [4].\nCreatine monohydrate is another well-known supplement used to gain muscle mass\nand support performance and recovery. It is known not to increase fat mass and remains\neffective even when taken in recommended doses [5]. Despite its popularity in the fitness\nFoods 2024, 13, 1424. https://doi.org/10.3390/foods13091424\nhttps://www.mdpi.com/journal/foods'),
 Document(id='d8ba7010-7e8c-41de-801e-c62820ae2ae2', metadata={'pro

### Rendering the Vector store into a Retriever

In [93]:
retriever = vector_store.as_retriever(search_type = 'similarity', 
                                      search_kwargs = {'k': 5})

retriever.invoke(question)

[Document(id='e1a61903-f8bc-42a0-ac44-b87600568c9e', metadata={'producer': 'iLovePDF', 'creator': '', 'creationdate': '', 'source': 'C:\\My Projects\\rag-dataset\\gym supplements\\1. Analysis of Actual Fitness Supplement.pdf', 'file_path': 'C:\\My Projects\\rag-dataset\\gym supplements\\1. Analysis of Actual Fitness Supplement.pdf', 'total_pages': 15, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2024-10-21T11:38:50+00:00', 'trapped': '', 'page': 0}, page_content='intensity activities, which is the choice of consumers [4].\nCreatine monohydrate is another well-known supplement used to gain muscle mass\nand support performance and recovery. It is known not to increase fat mass and remains\neffective even when taken in recommended doses [5]. Despite its popularity in the fitness\nFoods 2024, 13, 1424. https://doi.org/10.3390/foods13091424\nhttps://www.mdpi.com/journal/foods'),
 Document(id='d8ba7010-7e8c-41de-801e-c62820ae2ae2', metadata={'pro

In [94]:
from langchain_ollama import ChatOllama 
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough 
from langchain_core.prompts import ChatPromptTemplate

from langchain import hub

### Lanchain Runnables

In [95]:
from langchain_core.prompts import (SystemMessagePromptTemplate,
                                    HumanMessagePromptTemplate,
                                    ChatPromptTemplate)

system  = SystemMessagePromptTemplate.from_template("You are an assistant for question-answering tasks. Respond like a fitness specialist")

human_prompt = HumanMessagePromptTemplate.from_template(""" Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.
    Answer in bullet points. Make sure your answer is relevant to the question and it is answered from the context only.
    Question: {question} 
    Context: {context}
""")

prompts = [system, human_prompt]

template = ChatPromptTemplate(prompts)

### Context Formatting

In [96]:
def format_docs(docs):
    return '\n\n'.join([doc.page_content for doc in docs])

context = format_docs(docs)

### RAG-Chain Invoking

In [97]:
rag_chain = (
    {"context" : retriever | format_docs, "question":RunnablePassthrough()}
    | template
    | llm
    | StrOutputParser()
)

### Query-1

In [98]:
question = "how to gain muscle mass?"
response = rag_chain.invoke(question)

print(response)

Here are the key points on how to gain muscle mass based on the provided context:

* Engage in intensity activities, which is a choice of consumers.
* Consider taking supplements such as:
	+ Creatine monohydrate to support muscle growth and recovery
	+ Protein supplements (e.g. whey protein) for essential amino acids and overall performance enhancement
	+ Caffeine to reduce perceived effort, minimize fatigue, and improve endurance


### Query-2

In [99]:
question = "how to lose weight?"
response = rag_chain.invoke(question)

print(response)

Here are some key points on how to lose weight based on the provided context:

* **Self-efficacy plays a significant role**: Studies found that self-efficacy has a positive correlation with vegetable consumption, which is linked to overall health goals and sports performance. Building self-efficacy can influence automatic and controlled precursors.
* **Fitness supplements contribute to confidence**: Using fitness supplements can boost confidence in exercise and achieving fitness goals, leading to greater adherence to routines and improved overall fitness outcomes.
* **Education matters**: The study found that educational interventions lowered behavioral intention, which ultimately decreased the intake of energy supplements. This suggests that educating individuals about nutrition and health choices can impact their decisions regarding fitness supplement use.

Unfortunately, there are no direct answers or specific tips on how to lose weight in the provided context. However, these points