In [1]:
%%capture
!pip3 install --upgrade -q -U bitsandbytes
!pip3 install --upgrade -q -U peft
!pip3 install --upgrade -q -U trl
!pip3 install --upgrade -q -U accelerate
!pip3 install --upgrade -q -U datasets
!pip install evaluate rouge_score bert_score sacrebleu nltk sentencepiece accelerate # Need accelerate & sentencepiece for some BERTScore models
!pip3 install git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3

In [None]:
import os
from google.colab import userdata
os.environ["HF_TOKEN"] = 'huggingface_token'

In [36]:
import torch
import os
from transformers import (
    AutoTokenizer,
    BitsAndBytesConfig,
    Gemma3ForCausalLM, # Use the correct Gemma 3 class
    GenerationConfig
)
from peft import PeftModel
import readline # Optional: for better terminal input experience

# --- Configuration ---
SUMMARIZER_ADAPTER_ID = "lalit-03/gemma3-1b-summarization-finetuned"
BASE_MODEL_ID = "google/gemma-3-1b-it" # Base for both summarizer and generator
GENERATOR_MODEL_ID = "google/gemma-3-1b-it" # Can be the same or different

# Quantization Config (apply to both models for memory efficiency)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    # bnb_4bit_compute_dtype=torch.bfloat16 # Use bfloat16 if supported
    bnb_4bit_compute_dtype=torch.float16 # Fallback to float16 if bf16 not supported
)

# --- Device Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --- Load Summarization Model (Base + Adapter) ---
print(f"Loading base model '{BASE_MODEL_ID}' for summarizer...")
base_model_summarizer = Gemma3ForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    quantization_config=quantization_config,
    device_map="auto", # Let HF handle device placement
    attn_implementation="eager", # Or "sdpa" if available/preferred,
)

print(f"Loading PEFT adapter '{SUMMARIZER_ADAPTER_ID}' for summarizer...")
summarizer_model = PeftModel.from_pretrained(base_model_summarizer, SUMMARIZER_ADAPTER_ID)
summarizer_model.eval() # Set to evaluation mode
print("Summarizer model loaded.")

# --- Load Summarizer Tokenizer ---
# Use the tokenizer associated with the adapter's base model
summarizer_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
if summarizer_tokenizer.pad_token is None:
    summarizer_tokenizer.pad_token = summarizer_tokenizer.eos_token
    # Ensure the underlying model config is updated ONLY IF NECESSARY
    # (often handled by from_pretrained with PEFT, but good to be aware)
    # summarizer_model.config.pad_token_id = summarizer_tokenizer.pad_token_id
summarizer_tokenizer.padding_side = "left" # Use left padding for generation
print("Summarizer tokenizer loaded.")


# --- Load Response Generation Model ---
print(f"Loading generator model '{GENERATOR_MODEL_ID}'...")
generator_model = Gemma3ForCausalLM.from_pretrained(
    GENERATOR_MODEL_ID,
    quantization_config=quantization_config,
    device_map="auto", # Let HF handle device placement
    attn_implementation="eager" # Or "sdpa"
)
generator_model.eval() # Set to evaluation mode
print("Generator model loaded.")

# --- Load Generator Tokenizer ---
generator_tokenizer = AutoTokenizer.from_pretrained(GENERATOR_MODEL_ID)
if generator_tokenizer.pad_token is None:
    generator_tokenizer.pad_token = generator_tokenizer.eos_token
    # generator_model.config.pad_token_id = generator_tokenizer.pad_token_id
generator_tokenizer.padding_side = "left" # Use left padding for generation
print("Generator tokenizer loaded.")

Using device: cuda
Loading base model 'google/gemma-3-1b-it' for summarizer...
Loading PEFT adapter 'lalit-03/gemma3-1b-summarization-finetuned' for summarizer...
Summarizer model loaded.
Summarizer tokenizer loaded.
Loading generator model 'google/gemma-3-1b-it'...
Generator model loaded.
Generator tokenizer loaded.


In [None]:
# --- Helper Functions ---

def format_history_for_summarizer(history):
    """
    Formats the conversation history into a string similar to the
    training data format for the summarizer. Adjust this function
    if your training format was different.
    """
    formatted_conversation = ""
    for turn in history:
        speaker = "User" if turn["role"] == "user" else "Bot"
        formatted_conversation += f">>> {speaker}: {turn['content']}\n"

    # Construct the prompt exactly as used in training
    prompt = f"""Instruction: Please summarize the following empathetic dialogue conversation.

### Conversation:
{formatted_conversation.strip()}

### Summary:
""" # The model should generate text after this marker
    return prompt

@torch.no_grad()
def get_summary(conversation_history):
    """Generates a summary of the conversation history."""

    prompt = format_history_for_summarizer(conversation_history)
    inputs = summarizer_tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(summarizer_model.device) # Truncate input prompt

    generation_config_summary = GenerationConfig(
        max_new_tokens=100, # Adjust max length for summary
        pad_token_id=summarizer_tokenizer.pad_token_id,
        eos_token_id=summarizer_tokenizer.eos_token_id,
        do_sample=False, # Use greedy decoding or beam search for factual summary
        num_beams=3,     # Example using beam search
        early_stopping=True
    )

    outputs = summarizer_model.generate(**inputs, generation_config=generation_config_summary)

    # Decode only the generated part (after the prompt)
    input_length = inputs.input_ids.shape[1]
    generated_ids = outputs[:, input_length:]
    summary = summarizer_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    # Basic clean-up (remove potential leftover instruction/markers if any)
    summary = summary.replace("### Summary:", "").strip()

    return summary

from openai import OpenAI
groq_client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key="groq_api_key",  # 🔁 Replace with your actual key
)

# GROQ setup
# openai.api_key = "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"  # Replace with your actual API key
# openai.api_base = "https://api.groq.com/openai/v1"

def get_empathetic_response(summary, conversation_history):
    """Generates an empathetic response using a large model from GROQ API."""
    if not conversation_history:
        return "Hello! How can I help you today?"

    last_user_message = ""
    for i in range(len(conversation_history) - 1, -1, -1):
        if conversation_history[i]["role"] == "user":
            last_user_message = conversation_history[i]["content"]
            break

    if not last_user_message:
        last_user_message = conversation_history[-1]['content']

    # Construct the same instruction-based prompt (modified version you approved)
    prompt = f"""You are an empathetic chatbot. Your goal is to provide supportive and understanding responses, while matching the user's tone appropriately.
If the user's last message is a casual or friendly greeting (like "hi", "hello", or "how are you"), respond in a warm and casual way.
If the user's last message expresses emotional distress, respond with empathy, care, and understanding.
Base your reply *only* on the following summary of the conversation so far, and the user's very last message.
Do not refer to the summary directly in your response.

Conversation Summary:
{summary}

User's Last Message:
{last_user_message}

Your Response:
"""

    # Call GROQ API
    response = groq_client.chat.completions.create(
        model="llama3-70b-8192",  # Or llama3-70b if available on GROQ
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.7,
        top_p=0.9,
        max_tokens=200,
    )

    return response.choices[0].message.content.strip()

In [40]:
# --- Main Chat Loop ---
conversation_history = []

print("\n--- Empathetic Chatbot ---")
print("Chatbot is ready. Type 'quit' to exit.")

# Optional: Initial bot message
# initial_bot_message = "Hello there! I'm here to listen. How are you feeling today?"
# print(f"Bot: {initial_bot_message}")
# conversation_history.append({"role": "assistant", "content": initial_bot_message})

while True:
    try:
        user_input = input("You: ")
    except EOFError: # Handle Ctrl+D
        print("\nExiting chatbot.")
        break

    if user_input.lower() == 'quit':
        print("Bot: Goodbye! Take care.")
        break

    if not user_input.strip(): # Skip empty input
        continue

    # Add user message to history
    conversation_history.append({"role": "user", "content": user_input})

    # 1. Generate Summary (only if history exists)
    current_summary = ""
    if len(conversation_history) > 1:
        print("\nGenerating summary...")
        current_summary = get_summary(conversation_history)
        print(f"Summary generated: {current_summary[:150]}...") # Print snippet
    else:
        print("Skipping summary for the first turn.")


    # 2. Generate Empathetic Response
    print("Generating response...")
    bot_response = get_empathetic_response(current_summary, conversation_history)

    # Add bot response to history
    conversation_history.append({"role": "assistant", "content": bot_response})

    # Print bot response
    print(f"Bot: {bot_response}")

    # Optional: Limit history size to prevent excessive memory usage/context length issues
    MAX_HISTORY_TURNS = 10 # Keep last 10 pairs (user + bot)
    if len(conversation_history) > MAX_HISTORY_TURNS * 2:
       # Keep the last MAX_HISTORY_TURNS*2 items (10 user, 10 bot)
       conversation_history = conversation_history[-(MAX_HISTORY_TURNS * 2):]
       print("(History trimmed)")


--- Empathetic Chatbot ---
Chatbot is ready. Type 'quit' to exit.
You: Hi
Skipping summary for the first turn.
Generating response...
Bot: Hey! It's great to chat with you! How's your day going so far?
You: my girlfriend left me

Generating summary...
Summary generated: In a moment of vulnerability, the user expresses feelings of abandonment and hurt after their girlfriend has broken up with them. The bot responds wit...
Generating response...
Bot: I'm so sorry to hear that. It can be really tough to go through a breakup, especially when it feels like someone you care about is walking away. Would you like to talk about what's been going on and how you're feeling? I'm here to listen and offer support.
You: her memories haunt me

Generating summary...
Summary generated: In a moment of vulnerability, the user expresses deep pain from a recent breakup, feeling that their ex-girlfriend has left them with painful memories...
Generating response...
Bot: I'm so sorry to hear that. It's like s

In [9]:
i