In [None]:
import os
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import joblib
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [None]:
faq=pd.read_csv('faq.csv')
faq.head()

Unnamed: 0,id,question,answer
0,1,What is the best time to workout?,Morning workouts can boost energy and metaboli...
1,2,How much protein do I need daily?,0.8–1g protein per kg body weight is recommended.
2,3,Can I lose weight without cardio?,"Yes, strength training and calorie deficit can..."
3,4,What foods help muscle growth?,"Lean meats, fish, legumes, and protein-rich fo..."
4,5,How often should I work out weekly?,3–5 times per week is effective for most people.


In [None]:
EMBEDDED_MODEL = "all-MiniLM-L6-v2"
embedder = SentenceTransformer(EMBEDDED_MODEL,device=device)

# prepare corpus
corpus = faq['question'].astype(str).tolist()
embeddings = embedder.encode(corpus, convert_to_numpy=True, show_progress_bar=True)

# save for future use
os.makedirs("models/chatbot",exist_ok=True)
np.save("models/chatbot/faq_embeddings.npy",embeddings)
with open(os.path.join("models/chatbot",'embedder.txt'),'w') as f:
  f.write(EMBEDDED_MODEL)
# joblib.dump(embedder,"models/chatbot/embedder.joblib")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
nn = NearestNeighbors(n_neighbors=1,metric='cosine').fit(embeddings)
def retrieve_nn(query, k=3):
  q_emb=embedder.encode([query], convert_to_numpy=True)
  dists, idxs=nn.kneighbors(q_emb,n_neighbors=k)
  # cosine distance -> simialrity = 1 - dist
  return [(idxs[0][i], 1-dists[0][i], faq.iloc[idxs[0][i]]['question'], faq.iloc[idxs[0][i]]['answer']) for i in range(len(idxs[0]))]

In [None]:
GEN_MODEL_NAME = "microsoft/DialoGPT-medium"  # or "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL_NAME)
gen_model = AutoModelForCausalLM.from_pretrained(GEN_MODEL_NAME).to(device)

# simple generate function
def generate_response_generative(prompt, max_new_tokens=200, temperature=0.7, top_p=0.9, top_k=50):
    input_ids = tokenizer.encode(prompt + tokenizer.eos_token, return_tensors="pt").to(device)
    out = gen_model.generate(
        input_ids,
        max_length=input_ids.shape[1] + max_new_tokens,
        do_sample=True,
        top_k=top_k,
        top_p=top_p,
        temperature=temperature,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        no_repeat_ngram_size=3,
    )
    # decode only the generated part
    gen_text = tokenizer.decode(out[0, input_ids.shape[1]:], skip_special_tokens=True)
    return gen_text

# test
print(generate_response_generative("Hello! How do I lose weight?"))

tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Eat less.


In [None]:
# ensure model is GODEL:
GEN_MODEL_NAME = "microsoft/GODEL-v1_1-base-seq2seq"
tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL_NAME)
gen_model = AutoModelForSeq2SeqLM.from_pretrained(GEN_MODEL_NAME).to(device)

def generate_response_godel(query, history=None, context=None,
                           max_new_tokens=600,
                           do_sample=False, top_p=0.95, temperature=0.9,
                           num_beams=4):
    """
    Generate with GODEL using readable markers (no raw <|...|> tokens).
    Returns cleaned string (trims any accidental repetition of the prompt).
    """
    history = history or []
    context = context or "You are an expert personal fitness and nutrition assistant."

    # Build short dialogue history string (last 3 turns)
    dialogue = ""
    for turn in history[-3:]:
        # each turn is dict {'user':..., 'bot':...}
        dialogue += f"User: {turn['user']}\nAssistant: {turn['bot']}\n"

    # Detect plan-like queries to vary system instruction
    plan_keywords = ["plan", "schedule", "calendar", "routine", "program", "diet", "workout"]
    is_plan = any(k in query.lower() for k in plan_keywords)

    if is_plan:
        system_instr = (
            "Instruction: You are a professional fitness & nutrition coach. "
            "If asked to create a plan or schedule, produce a detailed, day-by-day "
            "plan (include exercises, reps/sets, duration, and optional notes). "
            "If not asked for a plan, answer concisely and accurately."
        )
    else:
        system_instr = (
            "Instruction: You are a knowledgeable fitness & nutrition assistant. "
            "Answer the user's question clearly and accurately."
        )

    # Compose the final human-readable prompt (no special token markers)
    prompt = (
        f"{system_instr}\n\n"
        f"Context: {context}\n\n"
        f"{dialogue}"
        f"User: {query}\n"
        f"Assistant:"
    )

    # Tokenize and generate
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
    outputs = gen_model.generate(
        input_ids,
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        top_p=top_p,
        temperature=temperature,
        num_beams=num_beams,
        repetition_penalty=1.2,
        no_repeat_ngram_size=3,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        length_penalty=1.0,
    )

    raw = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Postprocess: if model accidentally repeats the prompt text at start, strip it
    # e.g. if raw starts with prompt or system instr, remove leading overlap
    # Keep robust: remove exact prompt occurrence if present
    if raw.startswith(prompt):
        cleaned = raw[len(prompt):].strip()
    else:
        # also remove accidental leading "User:" or "Assistant:" echoes
        cleaned = raw.strip()
        for prefix in ("User:", "Assistant:", "Context:"):
            if cleaned.startswith(prefix):
                # remove a single leading prefix if it equals the prompt area
                cleaned = cleaned[len(prefix):].lstrip()

    return cleaned


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "google/gemma-2b-it"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16 if device=="cuda" else torch.float32).to(device)

def generate_response_gemma(prompt, max_new_tokens=600):
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    out = model.generate(
        **input_ids,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        max_new_tokens=max_new_tokens,
        repetition_penalty=1.2
    )
    return tokenizer.decode(out[0], skip_special_tokens=True)


`torch_dtype` is deprecated! Use `dtype` instead!


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [None]:
query = "Create a 7-day workout calendar for weight loss for a person with BMI of 25."
response = hybrid_retrieval_first(query)
print("Mode:", response['mode'])
print("\nGenerated Answer:\n", response['answer'])

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Mode: rag

Generated Answer:
 If not asked for a plan, answer concisely and accurately. Context: A: 3–5 times per week is effective for most people. Q: How to track calorie intake effectively? A: Apps or journals help track calories accurately. A: Morning workouts can boost energy and metabolism throughout the day.


In [None]:
RETRIEVAL_THRESHOLD = 0.65

def rag_generate(query, k=3):
    """Use retrieved FAQ context + generative reasoning."""
    results = retrieve_nn(query, k=k)
    context = "\n\n".join([f"Q: {r[2]}\nA: {r[3]}" for r in results])

    prompt = f"""
You are a professional AI fitness and nutrition assistant.
Below are some FAQs that might help answer the user’s question.

Context:
{context}

User Question:
{query}

Please give a detailed, structured, and motivational answer.
If the user asks for a plan or schedule, format it day-by-day clearly.
"""
    return generate_response_gemma(prompt)


def hybrid_retrieval_first(query, k=3, threshold=RETRIEVAL_THRESHOLD):
    """Decide between retrieval-only or RAG generation."""
    results = retrieve_nn(query, k=k)
    top_idx, top_sim, q_text, ans_text = results[0]

    if top_sim >= threshold:
        return {"mode": "retrieval", "answer": ans_text, "score": float(top_sim)}
    else:
        # fallback to RAG + generation
        gen = rag_generate(query, k=k)
        return {"mode": "rag", "answer": gen, "score": float(top_sim)}


In [None]:
query = "Create a 7-day workout calendar for weight loss for a person with BMI of 25."
response = hybrid_retrieval_first(query)
print("Mode:", response['mode'])
print("\nGenerated Answer:\n", response['answer'])

Mode: rag

Generated Answer:
 
You are a professional AI fitness and nutrition assistant.
Below are some FAQs that might help answer the user’s question.

Context:
Q: How often should I work out weekly?
A: 3–5 times per week is effective for most people.

Q: How to track calorie intake effectively?
A: Apps or journals help track calories accurately.

Q: What is the best time to workout?
A: Morning workouts can boost energy and metabolism throughout the day.

User Question:
Create a 7-day workout calendar for weight loss for a person with BMI of 25.

Please give a detailed, structured, and motivational answer.
If the user asks for a plan or schedule, format it day-by-day clearly.
Include specific exercises, sets, reps, rest periods, and any additional details relevant to weight loss.

**Weekly Weight Loss Workout Calendar for BMI 25**

**Day 1:**
* Warm up with 5 minutes of light cardio and dynamic stretching.
* Bodyweight squats (3 sets x 8-10 reps)
* Dumbbell bicep curls (3 sets x 8-1

In [None]:
query = "What type of foods help muscle gain?	"
response = hybrid_retrieval_first(query)
print("Mode:", response['mode'])
print("\nGenerated Answer:\n", response['answer'])

Mode: retrieval

Generated Answer:
 Lean meats, fish, legumes, and protein-rich foods help muscle growth.
