In [4]:
# --- Cell 1 ---
!pip install nltk==3.9.1 gradio==4.44.0 scikit-learn




In [5]:
# --- Cell 2 ---

import nltk
nltk.download("punkt")
nltk.download("punkt_tab")   # 👈 NEW — fixes LookupError
nltk.download("stopwords")



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [7]:
import gradio as gr
import string
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# ✅ Step 1 — FAQ dataset
faq_data = {
    "what are your business hours": "Our business hours are Monday to Friday, 9 AM to 6 PM.",
    "what is your return policy": "You can return items within 7 days with a valid receipt.",
    "where is your office located": "Our office is located in Lahore, Pakistan.",
    "how can i contact support": "You can contact support via email at support@company.com or call +92 300 1234567.",
    "do you offer home delivery": "Yes, we offer home delivery services across Pakistan.",
}

# ✅ Step 2 — Preprocess
def preprocess(text):
    tokens = word_tokenize(text.lower())
    stop_words = set(stopwords.words("english"))
    tokens = [t for t in tokens if t not in string.punctuation and t not in stop_words]
    return " ".join(tokens)

questions = list(faq_data.keys())
answers = list(faq_data.values())
processed_questions = [preprocess(q) for q in questions]

# ✅ Step 3 — TF-IDF Vectorization
vectorizer = TfidfVectorizer()
question_vectors = vectorizer.fit_transform(processed_questions)

# ✅ Step 4 — Response Logic (fixed)
def get_answer(user_input, history=[]):
    try:
        user_input = preprocess(user_input)
        user_vec = vectorizer.transform([user_input])
        similarity = cosine_similarity(user_vec, question_vectors)
        index = np.argmax(similarity)
        score = similarity[0][index]
        if score > 0.3:
            return answers[index]
        else:
            return "Sorry, I don’t know the answer to that yet."
    except Exception as e:
        return f"⚠️ Internal error: {str(e)}"

# ✅ Step 5 — Gradio Chat Interface
demo = gr.ChatInterface(
    fn=get_answer,
    title="FAQ Chatbot 🤖",
    description="Ask me anything from our FAQs!",
    examples=[
        ["What are your business hours?"],
        ["Do you offer home delivery?"],
        ["Where is your office located?"]
    ],
)

demo.launch(share=True, debug=True)




Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().


--------


Running on public URL: https://f0e336bbb95c6fa003.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://f0e336bbb95c6fa003.gradio.live


