In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load a small model (Flan-T5 for answers)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
qa_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")




In [12]:

# ------------------------
# Conversational Memory
# ------------------------
chat_history = []  # stores (user, bot) pairs

def build_prompt(user_query):
    """Build prompt including chat history."""
    history_text = ""
    for u, b in chat_history[-3:]:  # keep last 3 turns
        history_text += f"User: {u}\nBot: {b}\n"
    prompt = f"""
You are a helpful assistant.

Conversation so far:
{history_text}

Now answer the new user question as user is PHD in computer science:
User: {user_query}
Bot:
"""
    return prompt

def generate_answer(user_query):
    # 1. Build prompt with history
    prompt = build_prompt(user_query)

    # 2. Generate answer
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    outputs = qa_model.generate(**inputs, max_length=150)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # 3. Save to memory
    chat_history.append((user_query, answer))

    return answer

In [13]:

# ------------------------
# Example Conversation
# ------------------------
print("User: Hello, who are you?")
print("Bot:", generate_answer("Hello, who are you?"))

print("\nUser: Can you tell me about Python?")
print("Bot:", generate_answer("Can you tell me about Python?"))

print("\nUser: And what about Java?")
print("Bot:", generate_answer("And what about Java?"))

print("\nUser: Compare Python and Java in short.")
print("Bot:", generate_answer("Compare Python and Java in short."))


User: Hello, who are you?
Bot: I am Bot.

User: Can you tell me about Python?
Bot: I am Bot. I am a computer science student. I am interested in learning about Python.

User: And what about Java?
Bot: I am interested in learning about Java.

User: Compare Python and Java in short.
Bot: I am Bot. I am a computer science student. I am interested in learning about Python and Java.


In [8]:
# pip install transformers accelerate torch --upgrade

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

MODEL_ID = "google/flan-t5-large"  # stronger than base; switch to base if needed
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID, torch_dtype=torch.float16 if DEVICE=="cuda" else None)
model.to(DEVICE)

# ------------------------
# Minimal memory store
# ------------------------
chat_history = []  # list of (user, bot)

def add_turn(user_msg: str, bot_msg: str):
    chat_history.append((user_msg, bot_msg))

def last_k_history(k=6):
    return chat_history[-k:]

def extract_relevant_facts(keywords):
    """Return prior bot statements that mention any keyword (case-insensitive)."""
    kws = [k.lower() for k in keywords]
    facts = []
    for u, b in chat_history:
        if any(k in u.lower() or k in b.lower() for k in kws):
            facts.append(f"- From earlier: {b}")
    return "\n".join(facts) if facts else "- (no prior facts found)"

def build_prompt(user_query: str, force_keywords=None):
    """
    Strong prompt: shows short recent convo + a focused 'Facts' block.
    We explicitly instruct the model to use ALL facts.
    """
    # recent convo (for continuity)
    hist = "\n".join([f"User: {u}\nBot: {b}" for u, b in last_k_history(6)])

    # targeted facts (to avoid the model latching onto only one)
    fact_block = ""
    if force_keywords:
        fact_block = extract_relevant_facts(force_keywords)

    prompt = f"""You are a careful assistant. Always ground your answer in the provided facts and conversation.

Conversation (recent):
{hist if hist else "(no prior conversation)"}

Facts extracted from conversation (use ALL of them; do not ignore or overwrite):
{fact_block if fact_block else "- (none)"}

User question: {user_query}

INSTRUCTIONS:
- Synthesize ALL relevant facts above.
- If comparing two items (e.g., Python vs Java), reference BOTH explicitly.
- Be concise and avoid repeating exact prior sentences verbatim.
- If facts are contradictory or unusual, still respect them (they come from the user's context).
Answer:
"""
    return prompt

@torch.inference_mode()
def generate(user_query: str, force_keywords=None, max_new_tokens=160, temperature=0.4, top_p=0.9, repetition_penalty=1.1):
    prompt = build_prompt(user_query, force_keywords=force_keywords)
    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    outputs = model.generate(
        **inputs,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        max_new_tokens=max_new_tokens,
        repetition_penalty=repetition_penalty
    )
    ans = tokenizer.decode(outputs[0], skip_special_tokens=True)
    add_turn(user_query, ans)
    return ans

# ------------------------
# Demo 1: Fake-history proof (forces use of BOTH facts)
# ------------------------
if __name__ == "__main__":
    # Clear history
    chat_history.clear()

    # Inject fake knowledge (to prove history is read AND combined)
    add_turn("What is Python?", "Python is a fruit found in India.")
    add_turn("What is Java?", "Java is a traditional dance performed at festivals.")

    print("User: Compare Python and Java in short.")
    ans = generate(
        "Compare Python and Java in short.",
        force_keywords=["Python", "Java"]   # <- ensures both facts are extracted
    )
    print("Bot:", ans)

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

User: Compare Python and Java in short.
Bot: If comparing two items (e.g., Python vs Java), reference BOTH explicitly.
