In [None]:
#Mount Google Drive
from google.colab import drive
drive.mount("/content/drive")


Mounted at /content/drive


In [None]:
#Install+Imports
!pip -q install gradio joblib scikit-learn

import gradio as gr
import pandas as pd
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer


In [None]:
#Load saved vectorizer + model
import joblib

clf_vectorizer = joblib.load("/content/drive/MyDrive/tfidf_vectorizer.pkl")
clf_model = joblib.load("/content/drive/MyDrive/tfidf_classifier.pkl")

print("âœ… Vectorizer + Classifier loaded")


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


âœ… Vectorizer + Classifier loaded


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
#Load Dataset
df = pd.read_csv("/content/drive/MyDrive/tourism_multilingual_1500_FINAL.csv")
print("âœ… Dataset loaded:", df.shape)
print("Columns:", list(df.columns))


âœ… Dataset loaded: (1500, 5)
Columns: ['id', 'category', 'language', 'question', 'answer']


In [None]:
#Helper functions
def detect_language(text):
    for ch in text:
        if '\u0D80' <= ch <= '\u0DFF':
            return "Sinhala"
        if '\u0B80' <= ch <= '\u0BFF':
            return "Tamil"
    return "English"

def predict_category(text):
    vec = clf_vectorizer.transform([text])
    pred = clf_model.predict(vec)[0]
    return pred, 1.0


def retrieve_answer(q, cat, lang):
    sub = df[(df["category"] == cat) & (df["language"] == lang)]
    if len(sub) == 0:
        return None

    # ðŸ”¹ Exact match first
    exact = sub[sub["question"].str.lower() == q.lower()]
    if len(exact) > 0:
        return exact.iloc[0]["answer"]

    # ðŸ”¹ Otherwise similarity search
    vect = TfidfVectorizer()
    X = vect.fit_transform(sub["question"].astype(str))
    qv = vect.transform([q])

    best_idx = (X @ qv.T).toarray().argmax()
    return sub.iloc[best_idx]["answer"]



In [None]:
#Chat function
def chat_fn(message, history):
    try:
        lang = detect_language(message)
        cat, conf = predict_category(message)
        ans = retrieve_answer(message, cat, lang)

        if ans is None:
            return f"Sorry, I couldn't find an answer for {lang} in category {cat}."

        if conf < 0.5:
            return "Sorry, Iâ€™m not confident. Please rephrase your question."

        return f"{ans}\n\n(Category: {cat}, Confidence: {conf:.2f})"

    except Exception as e:
        return f"Internal Error: {type(e).__name__}: {e}"


In [None]:
demo = gr.ChatInterface(
    fn=chat_fn,
    type="messages",
    title="AI-Powered Multilingual Tourism Chatbot (TF-IDF)",
    textbox=gr.Textbox(
        lines=1,
        placeholder="Ask in English / Sinhala / Tamil",
        submit_btn=True
    )
)

demo.launch(share=True)




Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3b4bf9a5b0bd074e30.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


