In [1]:
import os
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings # <- FREE EMBEDDINGS
from langchain_community.chat_models import ChatOllama         # <- FREE LLM
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 1. Load Documents
corpus_dir = './corpus/'
documents = []
for file_name in os.listdir(corpus_dir):
    if file_name.endswith(".pdf"):
        file_path = os.path.join(corpus_dir, file_name)
        loader = PyMuPDFLoader(file_path)
        documents.extend(loader.load())

In [3]:
# 2. Split Documents into Chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [4]:
# 3. Create FREE Embeddings and Store in ChromaDB
# This uses a model from Hugging Face that runs on your machine.
# The model will be downloaded automatically the first time you run this.
model_name = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

  embeddings = HuggingFaceEmbeddings(model_name=model_name)


In [6]:
# Create the vector store
persist_directory = 'db' # Use a new directory for the new embeddings
vectordb = Chroma.from_documents(
    documents=texts,
    embedding=embeddings,
    persist_directory=persist_directory
)
vectordb.persist()

  vectordb.persist()


In [7]:
# 4. Build the RAG Chain with a FREE LLM
# Initialize the free Llama 3 model running via Ollama
llm = ChatOllama(model="llama3")

# Load the vector store and create the retriever
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
retriever = vectordb.as_retriever(search_kwargs={"k": 4})

# Create the prompt template
system_prompt = (
    "You are a knowledgeable and precise fitness and nutrition science assistant. "
    "Answer the user's question based only on the context provided. "
    "If the context doesn't contain the answer, state that you cannot answer from the provided documents. "
    "Always include a disclaimer that you are not a medical doctor and this is for educational purposes.\n\n"
    "context : \n {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

  llm = ChatOllama(model="llama3")
  vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)


In [8]:
# Create and run the RAG chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
# --- Test it! ---
question = "Okay, I am just getting "
response = rag_chain.invoke({"input": question})

print("## Answer:")
print(response["answer"])
print("\n## Sources Used:")
for doc in response["context"]:
    print(f"- {doc.metadata.get('source', 'Unknown source')}, page {doc.metadata.get('page', 'N/A')}")

## Answer:
I'm not a medical doctor, but I can provide you with information based on the provided context.

According to the text, while a ketogenic diet has been shown to provide short-term benefits in some people, including weight loss and improvements in total cholesterol, blood sugar, and blood pressure, the evidence is limited due to small sample sizes, short durations (12 weeks or less), and lack of control groups. Additionally, long-term effects (beyond one year) are not significantly different from conventional weight loss diets.

It's also important to note that eliminating several food groups and potential unpleasant symptoms may make compliance difficult. Furthermore, an emphasis on foods high in saturated fat counters recommendations from the Dietary Guidelines for Americans and the American Heart Association, which may have adverse effects on blood LDL cholesterol.

The provided context also mentions a study comparing low-fat diets with very-low-carbohydrate ketogenic diet

In [None]:
# --- All your previous setup code goes here ---
# (Imports, loading documents, splitting, creating the RAG chain, etc.)
# Make sure your 'rag_chain' variable is created before this block.

import ipywidgets as widgets
from IPython.display import display, HTML

# 1. Define the UI Widgets
# A text area for the user to type their question
question_input = widgets.Textarea(
    placeholder='Ask a question about weight loss...',
    description='Question:',
    layout={'width': '90%', 'height': '100px'}
)

# A button to submit the question
submit_button = widgets.Button(
    description='Get Answer',
    button_style='success',
    icon='question'
)

# An output area to display the results
output_area = widgets.Output()

# 2. Create a function to handle the button click
def on_button_clicked(b):
    # This function runs when the button is pressed
    with output_area:
        output_area.clear_output() # Clear previous results
        question = question_input.value
        if not question:
            print("Please enter a question.")
            return
        
        print("Thinking...")
        
        # --- Run your RAG Chain ---
        response = rag_chain.invoke({"input": question})
        
        # Format the output for better readability
        answer = response["answer"]
        sources = response["context"]
        
        # Clear the "Thinking..." message and display the final result
        output_area.clear_output()
        
        # Display the answer
        display(HTML(f"<h3>Answer:</h3><p>{answer}</p>"))
        
        # Display the sources
        sources_html = "<h3>Sources Used:</h3><ul>"
        for doc in sources:
            source_file = doc.metadata.get('source', 'Unknown source')
            page_num = doc.metadata.get('page', 'N/A')
            sources_html += f"<li>{source_file}, page {page_num}</li>"
        sources_html += "</ul>"
        display(HTML(sources_html))

# 3. Link the button to the function
submit_button.on_click(on_button_clicked)

# 4. Display the interactive elements
# This will render the UI in your notebook cell
print("Your Personal Learning Portal is ready!")
display(question_input, submit_button, output_area)

Your Personal Learning Portal is ready!


Textarea(value='', description='Question:', layout=Layout(height='100px', width='90%'), placeholder='Ask a que…

Button(button_style='success', description='Get Answer', icon='question', style=ButtonStyle())

Output()