In [6]:
import pandas as pd

# Load your dataset
df = pd.read_csv("Rentwee_Chatbot_Faqs_Dataset.csv")

# Keep only necessary columns
df = df[['Category', 'Question', 'Answer']]

# Drop duplicates and rows with missing data
df.dropna(subset=["Question", "Answer"], inplace=True)
df.drop_duplicates(subset=["Question"], inplace=True)

# Optional: clean formatting
df['Question'] = df['Question'].str.strip().str.lower()
df




Unnamed: 0,Category,Question,Answer
0,General Information,what is rentwee?,"An app for finding and renting rooms, apartmen..."
1,General Information,can you tell me about what is rentwee?,"An app for finding and renting rooms, apartmen..."
3,General Information,how can i what is rentwee?,"An app for finding and renting rooms, apartmen..."
4,General Information,what about what is rentwee?,"An app for finding and renting rooms, apartmen..."
11,General Information,what is the main purpose of this application?,To connect property owners with tenants and si...
...,...,...,...
649,Account Management,how do i change my location in the app?,You can change your location in your profile s...
650,Account Management,where can i find how to change my location in ...,You can change your location in your profile s...
651,Account Management,how can i how to change my location in the app?,You can change your location in your profile s...
652,Account Management,what about how to change my location in the app?,You can change your location in your profile s...


In [2]:
from sentence_transformers import SentenceTransformer

# Load a pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')  # Small and fast, good for retrieval

# Convert questions to embeddings
question_embeddings = model.encode(df['Question'].tolist(), show_progress_bar=True)


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Batches: 100%|██████████| 11/11 [00:02<00:00,  4.03it/s]


In [3]:
import faiss
import numpy as np

# Convert embeddings to float32 (required by FAISS)
question_embeddings = np.array(question_embeddings).astype("float32")

# Create a FAISS index
index = faiss.IndexFlatL2(question_embeddings.shape[1])  # L2 = Euclidean distance
index.add(question_embeddings)


In [4]:
def search_question(user_query, top_k=1):
    user_query_clean = user_query.strip().lower()
    query_embedding = model.encode([user_query_clean]).astype("float32")

    distances, indices = index.search(query_embedding, top_k)
    
    results = []
    for i in range(top_k):
        idx = indices[0][i]
        results.append({
            "matched_question": df.iloc[idx]['Question'],
            "answer": df.iloc[idx]['Answer'],
            "category": df.iloc[idx]['Category'],
            "score": float(distances[0][i])
        })
    return results


In [7]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Chatbot: Goodbye!")
        break

    results = search_question(user_input, top_k=1)
    best = results[0]
    
    print(f"\nChatbot ({best['category']}): {best['answer']}\n")



Chatbot (Account Management): You can change your location in your profile settings.


Chatbot (Searching & Filtering): To find specific listings by keywords like location or property type.

Chatbot: Goodbye!


In [None]:
import pandas as pd
from flask import Flask, jsonify, request
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import os

app = Flask(__name__)

# --- Chatbot Logic from your Notebook ---
# This part loads the dataset, model, and creates the FAISS index
# It's crucial that Rentwee_Chatbot_Faqs_Dataset.csv is in the same directory
# or you provide the full path to it.

df = None
model = None
index = None
question_embeddings = None

def load_chatbot_resources():
    """Loads the dataset, sentence transformer model, and FAISS index."""
    global df, model, index, question_embeddings

    # Check if resources are already loaded
    if df is not None and model is not None and index is not None:
        print("Chatbot resources already loaded.")
        return

    print("Loading chatbot resources...")
    try:
        # Load your dataset
        # Ensure the CSV file is accessible from where this script is run
        csv_file_path = 'Rentwee_Chatbot_Faqs_Dataset.csv'
        if not os.path.exists(csv_file_path):
            print(f"Error: CSV file not found at {csv_file_path}")
            # Attempt to fetch if it's an uploaded file in a specific environment
            # This part is conceptual for environments where files might be fetched
            # In a standard local setup, ensure the file is present.
            try:
                # This is a placeholder for how you might fetch content in a specific environment
                # For local development, ensure the CSV is in the same directory as app.py
                # from content_fetcher import fetch
                # fetched_content = fetch(query="Rentwee_Chatbot_Faqs_Dataset.csv", source_references=[{"id": "uploaded:Rentwee_Chatbot_Faqs_Dataset.csv", "type": "file"}])
                # with open(csv_file_path, 'w', encoding='utf-8') as f:
                #     f.write(fetched_content)
                pass
            except Exception as e:
                print(f"Could not automatically fetch CSV: {e}")
                raise FileNotFoundError(f"CSV file '{csv_file_path}' not found. Please ensure it's in the same directory as app.py.")


        df = pd.read_csv(csv_file_path)

        # Keep only necessary columns
        df = df[['Category', 'Question', 'Answer']]

        # Drop duplicates and rows with missing data
        df.dropna(subset=["Question", "Answer"], inplace=True)
        df.drop_duplicates(subset=["Question"], inplace=True)

        # Optional: clean formatting
        df['Question'] = df['Question'].str.strip().str.lower()

        # Load a pre-trained model
        model = SentenceTransformer('all-MiniLM-L6-v2')

        # Convert questions to embeddings
        question_embeddings = model.encode(df['Question'].tolist(), show_progress_bar=False) # No progress bar in API

        # Convert embeddings to float32 (required by FAISS)
        question_embeddings = np.array(question_embeddings).astype("float32")

        # Create a FAISS index
        index = faiss.IndexFlatL2(question_embeddings.shape[1])  # L2 = Euclidean distance
        index.add(question_embeddings)
        print("Chatbot resources loaded successfully!")

    except Exception as e:
        print(f"Failed to load chatbot resources: {e}")
        # Exit or handle error appropriately if resources can't be loaded
        exit(1) # Exit if essential resources cannot be loaded

# Call this function once when the Flask app starts
with app.app_context():
    load_chatbot_resources()

def search_question(user_query, top_k=1):
    """
    Searches the FAISS index for the most relevant question and returns its answer.
    """
    if model is None or index is None or df is None:
        raise RuntimeError("Chatbot resources not loaded. Cannot perform search.")

    user_query_clean = user_query.strip().lower()
    query_embedding = model.encode([user_query_clean]).astype("float32")

    distances, indices = index.search(query_embedding, top_k)
    
    results = []
    for i in range(top_k):
        idx = indices[0][i]
        results.append({
            "matched_question": df.iloc[idx]['Question'],
            "answer": df.iloc[idx]['Answer'],
            "category": df.iloc[idx]['Category'],
            "score": float(distances[0][i])
        })
    return results

# --- Flask API Endpoints ---

@app.route('/')
def home():
    """Simple home endpoint to confirm the API is running."""
    return "Rentwee Chatbot API is running!"

@app.route('/ask', methods=['POST'])
def ask_chatbot():
    """
    API endpoint to receive user queries and return chatbot responses.
    Expects a JSON payload like: {"query": "What is Rentwee?"}
    Returns a JSON payload like: {"answer": "...", "category": "..."}
    """
    if not request.is_json:
        return jsonify({"error": "Request must be JSON"}), 400

    user_input = request.json.get('query')
    if not user_input:
        return jsonify({"error": "No 'query' field found in JSON payload"}), 400

    try:
        results = search_question(user_input, top_k=1)
        if not results:
            return jsonify({"answer": "I'm sorry, I couldn't find an answer to that question.", "category": "No Match"}), 200
        
        best_match = results[0]
        return jsonify({
            "answer": best_match['answer'],
            "category": best_match['category']
        }), 200
    except RuntimeError as e:
        return jsonify({"error": str(e)}), 500
    except Exception as e:
        return jsonify({"error": f"An unexpected error occurred: {e}"}), 500

if __name__ == '__main__':
    # Run the Flask app on all available network interfaces (0.0.0.0)
    # and port 5000. debug=True allows for automatic reloading on code changes,
    # but for production, set debug=False.
    app.run(debug=True, host='0.0.0.0', port=5000)


  from .autonotebook import tqdm as notebook_tqdm


Loading chatbot resources...
Chatbot resources loaded successfully!
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.100.36:5000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
