In [6]:

!pip install -q pandas sentence-transformers faiss-cpu gradio

import pandas as pd
import numpy as np
import faiss
import random
from sentence_transformers import SentenceTransformer
import gradio as gr


# Loading and prepare dataset


filename = "trivia.csv"


df_full = pd.read_csv(filename)
df_full.columns = [c.lower().strip() for c in df_full.columns]

df_full = df_full[['question','answers']].dropna().reset_index(drop=True)

# Using first 1000 rows of Dataset
df = df_full.head(1000).reset_index(drop=True)

questions = df["question"].astype(str).tolist()
answers = df["answers"].astype(str).tolist()

print("Rows used:", len(df))


# Load embedding model

model = SentenceTransformer("all-MiniLM-L6-v2")

print("Embedding questions...")
embeddings = model.encode(
    questions,
    batch_size=32,
    convert_to_numpy=True,
    show_progress_bar=True
).astype("float32")

print("Embeddings shape:", embeddings.shape)



# Build FAISS index

d = embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(embeddings)

print("FAISS index ready. Items:", index.ntotal)



# Convert short answers → full natural sentences

sentence_templates = [
    "The answer is {}.",
    "The correct answer is {}.",
    "It is {}.",
    "The right response is {}.",
    "The correct response would be {}."
]

def make_sentence(ans):
    template = random.choice(sentence_templates)
    return template.format(ans.strip())



# Search function with threshold

DEFAULT_MESSAGE = "I cannot help you with this question. Please contact via email or phone."
DISTANCE_THRESHOLD = 1.0

def search_answer(query, k=1):
    q_emb = model.encode([query], convert_to_numpy=True).astype("float32")
    distances, indices = index.search(q_emb, k)

    best_distance = float(distances[0][0])
    best_idx = int(indices[0][0])

    # Low similarity → fallback message
    if best_distance > DISTANCE_THRESHOLD:
        return DEFAULT_MESSAGE

    # Normal matched answer
    full_sentence_answer = make_sentence(answers[best_idx])
    return full_sentence_answer



# Chat UI


def chatbot(user_input, history):
    bot_answer = search_answer(user_input)
    history.append((user_input, bot_answer))
    return history, history


with gr.Blocks() as ui:
    gr.Markdown("<h2 style='text-align:center;'>Trivia Chatbot</h2>")
    chat = gr.Chatbot()
    msg = gr.Textbox(label="Ask something…")
    clear = gr.Button("Clear Chat")

    msg.submit(chatbot, [msg, chat], [chat, chat])
    clear.click(lambda: None, None, chat, queue=False)

ui.launch()


Rows used: 1000
Embedding questions...


Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Embeddings shape: (1000, 384)
FAISS index ready. Items: 1000


  chat = gr.Chatbot()
  chat = gr.Chatbot()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0dd43519217898a86e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


