<a href="https://colab.research.google.com/github/rhaveri/master-thesis/blob/main/4_ui_chatbot_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- 1. INSTALLATION ---
print("⏳ Installing libraries... (Takes ~2 mins)")
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes
!pip install langchain-community langchain-core chromadb langchain-huggingface gradio

import os
import json
import torch
from unsloth import FastLanguageModel
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
import gradio as gr

# --- 2. PREPARE MODEL (LOCAL UPLOAD) ---
print("\n🔍 Checking for model...")

# Case A: You uploaded a ZIP
if os.path.exists("lora_model.zip"):
    print("📦 Found zip file! Unzipping...")
    !unzip -o -q lora_model.zip -d .
    print("✅ Unzipped.")

# Case B: You uploaded a FOLDER
elif os.path.exists("lora_model"):
    print("✅ Found 'lora_model' folder directly.")

else:
    raise FileNotFoundError("❌ Please upload 'lora_model.zip' or drag the 'lora_model' folder into the files area on the left!")

# --- 3. LOAD MODEL ---
print("\n🧠 Loading Fine-Tuned SFT Model...")
try:
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "lora_model",
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True,
    )
    FastLanguageModel.for_inference(model)
except Exception as e:
    print("❌ Failed to load model. Is the folder empty?")
    raise e

# --- 4. PREPARE KNOWLEDGE BASE ---
print("\n📚 Building RAG Database...")
if not os.path.exists("nutrition_documents_v2.json"):
    raise FileNotFoundError("❌ Please upload 'nutrition_documents_v2.json' to the files area.")

with open("nutrition_documents_v2.json", "r", encoding="utf-8") as f:
    docs = json.load(f)

documents = [Document(page_content=d["text"], metadata={"source": d["source"]}) for d in docs]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-large-en-v1.5",
    model_kwargs={'device': 'cuda'},
    encode_kwargs={'batch_size': 32}
)
vector_db = Chroma.from_documents(documents=splits, embedding=embeddings)

print("\n✅ SYSTEM READY! Run the Chat App cell below.")

⏳ Installing libraries... (Takes ~2 mins)
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-fl92se1j/unsloth_4ea61cdf7b0c4409a795d908bdd6c343
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-fl92se1j/unsloth_4ea61cdf7b0c4409a795d908bdd6c343
  Resolved https://github.com/unslothai/unsloth.git to commit 010775fbdebecf3f413002e593161393c72c0a09
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone

🔍 Checking for model...
📦 Found zip file! Unzipping...
✅ Unzipped.

🧠 Loading Fine-Tuned SFT Model...
==((====))==  Unsloth 2026.1.2: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_

model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

Unsloth 2026.1.2 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.



📚 Building RAG Database...


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]


✅ SYSTEM READY! Run the Chat App cell below.


In [None]:
def chat_logic(message, history):
    # 1. Retrieve
    docs = vector_db.similarity_search(message, k=3)
    if not docs:
        return "I'm sorry, I couldn't find relevant information in my database."
    context_text = "\n\n".join([d.page_content for d in docs])

    # 2. Prompt (Strict Persona)
    prompt = f"""Context information is below.
---------------------
{context_text}
---------------------
Given the context information and not prior knowledge, answer the query.

Query: {message}"""

    messages = [
        {"role": "system", "content": "You are a professional AI health coach. Answer strictly based on the provided context."},
        {"role": "user", "content": prompt}
    ]

    # 3. Generate
    inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(input_ids=inputs, max_new_tokens=512, use_cache=True, temperature=0.3)
    response = tokenizer.batch_decode(outputs)[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].replace("<|eot_id|>", "").strip()
    return response

# UI
chatbot = gr.ChatInterface(
    fn=chat_logic,
    title="🥗 AI Nutrition Health Coach (Thesis Demo)",
    description="Ask me about nutrition, sleep, and exercise. I use RAG to find facts from medical journals.",
    examples=["How can I eat healthy on a budget?", "What is the DASH diet?", "How much water do I need?"],
    theme=gr.themes.Soft()
)

chatbot.launch(share=True)

  self.chatbot = Chatbot(


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a02995ffa0a53fdc08.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# ============================================
# AI NUTRITION CHATBOT -
# ============================================

# --- INSTALLATION ---
"""
!pip install -q "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install -q --no-deps xformers trl peft accelerate bitsandbytes
!pip install -q langchain-community chromadb langchain-huggingface gradio
"""

import os
import json
import torch
from unsloth import FastLanguageModel
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
import gradio as gr

# ============================================
# SETUP
# ============================================

# Extract model if zipped
if os.path.exists("lora_model.zip"):
    os.system("unzip -q lora_model.zip")

# Load model
print("🧠 Loading model...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="lora_model",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True
)
FastLanguageModel.for_inference(model)

# Load documents and build vector DB
print("📚 Building knowledge base...")
with open("nutrition_documents_v2.json", "r") as f:
    raw_docs = json.load(f)

docs = [Document(page_content=d["text"], metadata={"source": d["source"]}) for d in raw_docs]
chunks = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)

embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-large-en-v1.5",
    model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
)

vector_db = Chroma.from_documents(chunks, embeddings)

# ============================================
# CHAT FUNCTION
# ============================================

def chat(message, history):
    """Main chat logic"""
    # Retrieve context
    docs = vector_db.similarity_search(message, k=3)
    if not docs:
        return "I couldn't find relevant information. Try rephrasing your question."

    context = "\n\n".join([d.page_content for d in docs])

    # Format prompt
    prompt = f"""Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the query.

Query: {message}"""

    messages = [
        {"role": "system", "content": "You are a professional AI health coach. Answer strictly based on the provided context."},
        {"role": "user", "content": prompt}
    ]

    # Generate answer
    inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(inputs, max_new_tokens=512, temperature=0.3, use_cache=True)
    answer = tokenizer.batch_decode(outputs)[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].replace("<|eot_id|>", "").strip()

    return answer + "\n\n*Disclaimer: I'm an AI coach, not a doctor. Consult a professional for medical advice.*"

# ============================================
# LAUNCH INTERFACE
# ============================================

print("🚀 Launching chatbot...")

gr.ChatInterface(
    fn=chat,
    title="🥗 AI Nutrition Health Coach",
    description="Ask about nutrition, diet, sleep, and exercise. Evidence-based answers from medical sources.",
    examples=[
        "How can I eat healthy on a budget?",
        "What is the DASH diet?",
        "How much water should I drink?",
        "What are good protein sources for vegetarians?",
    ],
    theme=gr.themes.Soft(primary_hue="green")
).launch(share=True)

print("✅ Chatbot is live!")