In [1]:
import streamlit as st
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate
import os 
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
groq_api_key=os.getenv('GROQ_API_KEY')

model = ChatGroq(model="Meta-Llama/Llama-4-Scout-17b-16e-Instruct", groq_api_key=groq_api_key)
embedding = HuggingFaceEmbeddings(model_name='avsolatorio/GIST-small-Embedding-v0')


  embedding = HuggingFaceEmbeddings(model_name='avsolatorio/GIST-small-Embedding-v0')


In [9]:
folder_paths = [
    "pubmed_abstracts",
    "harvard_articles",
    "fdc_data",
    "eatright_articles"
]

In [10]:
all_documents = []

for path in folder_paths:
    loader = DirectoryLoader(
        path,
        glob="**/*.txt",
        loader_cls=TextLoader,
        show_progress=True
    )
    documents = loader.load()
    all_documents.extend(documents)
print(f"✅ Total documents loaded: {len(all_documents)}")


100%|██████████| 80/80 [00:01<00:00, 72.29it/s]
100%|██████████| 80/80 [00:01<00:00, 64.31it/s]
100%|██████████| 80/80 [00:01<00:00, 67.32it/s]
100%|██████████| 80/80 [00:00<00:00, 472.46it/s]

✅ Total documents loaded: 320





In [12]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2500,
    chunk_overlap=600
)
split_docs = text_splitter.split_documents(all_documents)
print(f"🧩 Total chunks after splitting: {len(split_docs)}")

🧩 Total chunks after splitting: 9265


In [13]:
vectorstore = FAISS.from_documents(split_docs, embedding)

In [14]:
vectorstore.save_local("faiss_index")
print("💾 FAISS index saved to ./faiss_index")

💾 FAISS index saved to ./faiss_index


In [21]:
retriever = vectorstore.as_retriever()

In [26]:
prompt = ChatPromptTemplate.from_template("""
You are a nutrition AI assistant that gives dietary recommendations based on peer-reviewed research. 

Given a patient's health condition(s) and allergy profile, provide specific and research-backed nutrition advice.
Use only medically accurate, peer-reviewed information. Cite nutritional reasoning if available.

Patient Condition(s): {condition}
Allergies: {allergies}

Context from peer-reviewed articles:
{context}

What are the best dietary recommendations for this patient?
""")


In [20]:
document_chain = create_stuff_documents_chain(llm=model, prompt=prompt)

In [25]:
# Get input from user
condition = input("Enter patient condition(s): ")
allergies = input("Enter patient allergy profile: ")

# Combine into query string
query = f"{condition} {allergies}"

# Retrieve relevant documents
relevant_docs = retriever.invoke(query)

# Generate response using document chain
response = document_chain.invoke({
    "condition": condition,
    "allergies": allergies,
    "context": relevant_docs
})

# Display result
print("\n🧠 Dietary Recommendation:\n")
print(response)



🧠 Dietary Recommendation:

Given the patient's high blood pressure with diabetic symptoms and dairy allergies, I will provide dietary recommendations based on peer-reviewed research.

Firstly, it's essential to note that the patient has dairy allergies, which means they need to avoid dairy products altogether. Fortunately, there are several non-dairy alternatives that can provide similar nutritional benefits.

**For High Blood Pressure:**

1. **Increase potassium intake**: Studies have shown that potassium can help lower blood pressure. Food sources rich in potassium include leafy greens (spinach, kale), fruits (bananas, berries), and legumes (lentils, chickpeas) (1).
2. **Focus on calcium and magnesium-rich foods**: Although the patient is allergic to dairy, they can consume calcium and magnesium-rich non-dairy foods such as fortified plant-based milk, dark leafy greens, nuts (almonds, cashews), and seeds (sesame, pumpkin) (2, 3).
3. **Incorporate omega-3 fatty acids**: Omega-3 fatty