<a href="https://colab.research.google.com/github/rhaveri/master-thesis/blob/main/4_ui_chatbot_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- 1. INSTALLATION ---
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes
!pip install langchain-community langchain-core chromadb langchain-huggingface gradio

import os
import json
import torch
from unsloth import FastLanguageModel
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
import gradio as gr


if os.path.exists("lora_model.zip"):
    !unzip -o -q lora_model.zip -d .
    print("Unzipped.")

elif os.path.exists("lora_model"):
    print("Found 'lora_model' folder.")

else:
    raise FileNotFoundError("Please upload 'lora_model.zip'")

try:
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "lora_model",
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True,
    )
    FastLanguageModel.for_inference(model)
except Exception as e:
    print(" Failed to load model. ")
    raise e


if not os.path.exists("nutrition_documents_v2.json"):
    raise FileNotFoundError(" Please upload 'nutrition_documents_v2.json' ")

with open("nutrition_documents_v2.json", "r", encoding="utf-8") as f:
    docs = json.load(f)

documents = [Document(page_content=d["text"], metadata={"source": d["source"]}) for d in docs]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-large-en-v1.5",
    model_kwargs={'device': 'cuda'},
    encode_kwargs={'batch_size': 32}
)
vector_db = Chroma.from_documents(documents=splits, embedding=embeddings)

print("\n SYSTEM READY")

In [None]:
def chat_logic(message, history):
    # 1. Retrieve
    docs = vector_db.similarity_search(message, k=3)
    if not docs:
        return "I'm sorry, I couldn't find relevant information in my database."
    context_text = "\n\n".join([d.page_content for d in docs])

    # 2. Prompt
    prompt = f"""Context information is below.
---------------------
{context_text}
---------------------
Given the context information and not prior knowledge, answer the query.

Query: {message}"""

    messages = [
        {"role": "system", "content": "You are a professional AI health coach. Answer strictly based on the provided context."},
        {"role": "user", "content": prompt}
    ]

    # 3. Generate
    inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(input_ids=inputs, max_new_tokens=512, use_cache=True, temperature=0.3)
    response = tokenizer.batch_decode(outputs)[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].replace("<|eot_id|>", "").strip()
    return response

# UI
chatbot = gr.ChatInterface(
    fn=chat_logic,
    title="ðŸ¥— AI Nutrition Health Coach (Thesis Demo)",
    description="Ask me about nutrition, sleep, and exercise. I use RAG to find facts from medical journals.",
    examples=["How can I eat healthy on a budget?", "What is the DASH diet?", "How much water do I need?"],
    theme=gr.themes.Soft()
)

chatbot.launch(share=True)