In [45]:
!pip install groq python-dotenv --quiet

In [46]:
!pip install bert-score --quiet

In [47]:
import os
import json
from groq import Groq
from dotenv import load_dotenv
from bert_score import score
import warnings
from transformers import logging as hf_logging

In [48]:
load_dotenv()
api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=api_key)

In [49]:
with open("profile.json", "r") as f:
    user_profile = json.load(f)

In [50]:
# avoid warnings
warnings.filterwarnings("ignore")
hf_logging.set_verbosity_error()

In [51]:
def rewrite_query(query, profile):
    profile_text = json.dumps(profile, indent=2)
    prompt = (
        "You are a personalized query rewriting assistant.\n"
        "Use the user profile below to produce a rewritten search query.\n"
        "Only output the rewritten query (one line), nothing else.\n\n"
        f"User Profile:\n{profile_text}\n\n"
        f"User Query: {query}\n"
        "IMPORTANT: Output ONLY the rewritten query (single line). Do not explain.\n"
    )

    response = client.chat.completions.create(
        model="openai/gpt-oss-20B",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3,
    )
    return response.choices[0].message.content.strip()

In [52]:
def evaluate_with_bertscore(original, rewritten):
    """Compute semantic similarity using BERTScore (F1)."""
    P, R, F1 = score([rewritten], [original], lang="en", verbose=False)
    return F1.mean().item()

In [57]:
def heuristic_evaluation(rewritten, profile):
    """Check if rewritten query includes any substring from expanded profile fields."""
    keywords = []

    fields_to_check = [
        "preferences",
        "interests",
        "movies",
        "lifestyle",
        "food_habits",
        "search_preferences",
        "demographics",
        "job_summary",
        "education_summary",
        "relationships"
    ]

    for field in fields_to_check:
        if field in profile:
            value = profile[field]
            if isinstance(value, dict):
                for v in value.values():
                    keywords.append(str(v).lower())
            elif isinstance(value, list):
                for item in value:
                    keywords.append(str(item).lower())
            else:
                keywords.append(str(value).lower())

    rewritten_lower = rewritten.lower()
    for kw in keywords:
        for part in kw.split(", "):
            if part.strip() and part in rewritten_lower:
                return 1  # personalized
    return 0  # not personalized

In [59]:
if __name__ == "__main__":
  while True:
    user_query = input("User: ")
    if user_query.lower() in ["exit", "bye", "close", "quite"]:
      print("Chat ended, Goodbye!")
      break

    rewritten = rewrite_query(user_query, user_profile)
    print("Rewritten:", rewritten)

    metric_score = evaluate_with_bertscore(user_query, rewritten)
    print(f"BERTScore (semantic similarity): {metric_score:.4f}\n")

    heuristic_flag = heuristic_evaluation(rewritten, user_profile)
    print(f"Heuristic Evaluation: {'Personalized' if heuristic_flag==1 else 'Not Personalized'}")

User: exit
Chat ended, Goodbye!
