In [1]:
import json
import re
import torch
import unicodedata
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer
from sentence_transformers import SentenceTransformer, util

model = XLMRobertaForSequenceClassification.from_pretrained("./Model")
tokenizer = XLMRobertaTokenizer.from_pretrained("./Model")

with open("label-mapping.json", "r", encoding="utf-8") as f:
    intent_mapping = json.load(f)

with open("combine-data.json", "r", encoding="utf-8") as f:
    response_list = json.load(f)

embedder = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')

def clean_text(text):
    text = unicodedata.normalize("NFKC", text)
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def predict_intent(question):
    inputs = tokenizer(question, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    predicted_label = torch.argmax(outputs.logits, dim=1).item()
    intent = intent_mapping.get(str(predicted_label), "UnknownIntent")
    print(f"Predicted Intent: {intent}")
    return intent

def fallback_intent(question):
    cleaned = clean_text(question)
    for item in response_list:
        if clean_text(item["keyword"]) in cleaned:
            print(f"Fallback Intent Found: {item['intent']}")
            return item["intent"]
    return "UnknownIntent"

def get_response(intent, question):
    cleaned_question = clean_text(question)
    filtered_responses = [item for item in response_list if item["intent"].lower() == intent.lower()]

    print(f"Looking for responses with intent: {intent}")
    
    if not filtered_responses:
        print(f"No responses found for intent: {intent}")
        return "Sorry, I cannot answer that."

    print(f"Found {len(filtered_responses)} response(s) for intent '{intent}'")

    filtered_keywords = [clean_text(item["keyword"]) for item in filtered_responses]
    filtered_embeddings = embedder.encode(filtered_keywords, convert_to_tensor=True)
    question_embedding = embedder.encode(cleaned_question, convert_to_tensor=True)

    similarities = util.pytorch_cos_sim(question_embedding, filtered_embeddings).squeeze()
    best_idx = int(torch.argmax(similarities))
    best_score = float(similarities[best_idx])
    best_item = filtered_responses[best_idx]

    print(f"Best Match: '{best_item['keyword']}' | Score: {best_score:.2f}")

    # Fallback: if similarity is too low, try keyword substring match
    if best_score < 0.3:
        print("Low semantic score, trying direct keyword match...")
        for item in filtered_responses:
            if clean_text(item["keyword"]) in cleaned_question:
                print(f"Found match by keyword: {item['keyword']}")
                return format_answer(item["answer"])
        return "Sorry, I don't have an answer for that."

    return format_answer(best_item["answer"])

def format_answer(answer):
    if isinstance(answer, dict):
        return json.dumps(answer, indent=2, ensure_ascii=False)
    return str(answer)

def chatbot_response(user_question):
    intent = predict_intent(user_question)

    # If model fails, try fallback
    if intent == "UnknownIntent":
        intent = fallback_intent(user_question)

    return get_response(intent, user_question)


In [2]:
question = "visa card"
print("Bot:", chatbot_response(question))

Predicted Intent: Wing-Visa-Card-Virtual
Looking for responses with intent: Wing-Visa-Card-Virtual
Found 10 response(s) for intent 'Wing-Visa-Card-Virtual'
Best Match: 'wing visa card virtual' | Score: 0.72
Bot: Wing Visa Card Virtual is a digital version of a Visa card provided by Wing Bank that you can use for online shopping, travel booking, and digital subscriptions. The Wing Visa Card Virtual works just like a physical card but exists entirely on the Wing Bank App.
