In [3]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch

# --- Configuration ---
# If the similarity score is below this, the bot will say it doesn't know.
# You can tune this value. 0.65 is a good starting point.
SIMILARITY_THRESHOLD = 0.65
# The model for generating embeddings.
MODEL_NAME = 'all-MiniLM-L6-v2'


def setup_data_and_embeddings():
    """Prepares the sample data and generates embeddings."""
    # Step 1: Prepare the Data
    data = {
        'question': [
            "What are your shipping options?",
            "How do I track my order?",
            "What is the return policy?",
            "Can I change my shipping address?",
            "How do I cancel an order?",
            "Do you offer international shipping?",
            "My order arrived damaged, what should I do?"
        ],
        'answer': [
            "We offer standard (5-7 business days), expedited (2-3 business days), and overnight shipping.",
            "Once your order ships, you will receive an email with a tracking number. You can use this number on the carrier's website.",
            "We accept returns within 30 days of purchase. The item must be unused and in its original packaging. Please visit our returns page to start the process.",
            "If your order has not yet shipped, you can update the address in your account settings. If it has already shipped, you'll need to contact the carrier directly.",
            "You can cancel your order within 2 hours of placing it through your order history page. After that, the order cannot be canceled.",
            "Yes, we ship to over 100 countries worldwide. International shipping costs and times vary by destination.",
            "We're sorry to hear that! Please contact our customer support with your order number and a photo of the damaged item, and we will send a replacement."
        ]
    }
    faq_df = pd.DataFrame(data)

    # Step 2: Load model and generate embeddings
    print(f"Loading embedding model '{MODEL_NAME}'...")
    model = SentenceTransformer(MODEL_NAME)

    print("Generating embeddings for the FAQ data...")
    # The .encode() method converts text into a numerical vector.
    question_embeddings = model.encode(faq_df['question'].tolist(), convert_to_tensor=True)
    faq_df['embedding'] = question_embeddings.tolist()
    print("Embeddings are ready.\n")

    return faq_df, model


def find_best_answer(user_question, df, embedding_model):
    """
    Finds the most relevant answer from the FAQ DataFrame.
    """
    query_embedding = embedding_model.encode(user_question, convert_to_tensor=True)
    faq_embeddings = torch.tensor(df['embedding'].tolist())

    cosine_scores = util.cos_sim(query_embedding, faq_embeddings)

    # Get the index and score of the best match
    best_match_index = torch.argmax(cosine_scores).item()
    best_score = cosine_scores[0][best_match_index].item()

    # Retrieve the corresponding question and answer
    matched_question = df.loc[best_match_index, 'question']
    answer = df.loc[best_match_index, 'answer']

    return answer, matched_question, best_score


def conversation_loop(df, model):
    """
    Runs the main interactive loop for the chatbot.
    """
    print("--- Chatbot is now active! ---")
    print("Ask me a question. Type 'quit' or 'exit' to end the chat.")
    print("-" * 30)

    while True:
        # 1. Get user input
        user_input = input("You: ")

        # 2. Check for exit command
        if user_input.lower().strip() in ['quit', 'exit', 'bye']:
            print("\nBot: Goodbye! Have a great day.")
            break

        # 3. Find the best possible answer for the input
        answer, matched_q, score = find_best_answer(user_input, df, model)

        # 4. Decide on a response based on the similarity score
        if score > SIMILARITY_THRESHOLD:
            # If the match is good, provide the answer
            response = (f"\nBot: It looks like you're asking about '{matched_q}'.\n"
                        f"Here is the information I found: {answer}\n")
        else:
            # If the match is not good enough, say so
            response = ("\nBot: I'm sorry, I couldn't find a specific answer for that. "
                        "Could you please try rephrasing your question?\n")

        print(response)


# --- Main Execution Block ---
if __name__ == "__main__":
    # First, set up the data and the model once
    faq_dataframe, sentence_model = setup_data_and_embeddings()

    # Then, start the interactive conversation
    conversation_loop(faq_dataframe, sentence_model)


Loading embedding model 'all-MiniLM-L6-v2'...
Generating embeddings for the FAQ data...
Embeddings are ready.

--- Chatbot is now active! ---
Ask me a question. Type 'quit' or 'exit' to end the chat.
------------------------------
You: What are your shipping options?

Bot: It looks like you're asking about 'What are your shipping options?'.
Here is the information I found: We offer standard (5-7 business days), expedited (2-3 business days), and overnight shipping.

You: quit

Bot: Goodbye! Have a great day.
