In [10]:
import json
from rapidfuzz import process


class LocationValidator:
    def __init__(self, json_path):
        # Load the JSON file
        with open(json_path, "r") as file:
            self.locations = json.load(file)

    def _preprocess(self, text):
        """Normalize and preprocess text for matching."""
        return text.lower().replace("province", "").strip()

    def _find_best_match(self, input_value, options, score_cutoff=75):
        """Find the best match using two variants of the database."""
        if not input_value or not options:
            return None

        # First pass: Raw matching
        raw_match = process.extractOne(input_value, options, score_cutoff=score_cutoff)
        raw_match_name = raw_match[0] if raw_match else None
        raw_score = raw_match[1] if raw_match else 0

        # Second pass: Normalized matching
        input_value_normalized = self._preprocess(input_value)
        normalized_options = [self._preprocess(option) for option in options]
        normalized_match = process.extractOne(
            input_value_normalized, normalized_options, score_cutoff=score_cutoff
        )
        normalized_match_name = (
            options[normalized_options.index(normalized_match[0])]
            if normalized_match
            else None
        )
        normalized_score = normalized_match[1] if normalized_match else 0

        # Return the best match based on score
        if raw_score >= normalized_score:
            return raw_match_name
        return normalized_match_name

    def validate_location(self, province, district, municipality):
        # Extract province list
        province_list = self.locations.get("provinceList", [])
        province_names = [p["name"] for p in province_list]

        # Find the best match for province
        matched_province = self._find_best_match(province, province_names)
        if not matched_province:
            return {"error": f"'{province}' is not a valid province."}

        # Get the selected province's districts
        selected_province = next(p for p in province_list if p["name"] == matched_province)
        district_list = selected_province.get("districtList", [])
        district_names = [d["name"] for d in district_list]

        # Find the best match for district
        matched_district = self._find_best_match(district, district_names)
        if not matched_district:
            return {"error": f"'{district}' is not a valid district in {selected_province['name']}."}

        # Get the selected district's municipalities
        selected_district = next(d for d in district_list if d["name"] == matched_district)
        municipality_list = selected_district.get("municipalityList", [])
        municipality_names = [m["name"] for m in municipality_list]

        # Find the best match for municipality
        matched_municipality = self._find_best_match(municipality, municipality_names)
        if not matched_municipality:
            return {"error": f"'{municipality}' is not a valid municipality in {matched_district}."}

        # Return the matches
        return {
            "province": selected_province["name"],
            "district": selected_district["name"],
            "municipality": matched_municipality,
        }


# Test the function in a notebook
json_path = "/home/ubuntu/nepal_chatbot/resources/nepal_location.json"

# Initialize the validator
validator = LocationValidator(json_path)

# Mock inputs for testing
province_input = "bagmathi"
district_input = "NUWAKOTh"
municipality_input = "Ducheshwar"

# Validate and display results
result = validator.validate_location(province_input, district_input, municipality_input)
print(result)


{'error': "'' is not a valid district in BAGMATI PROVINCE."}


In [19]:
import json
from rapidfuzz import process


class LocationValidator:
    def __init__(self, json_path):
        # Load the JSON file and normalize province and district names to lowercase
        with open(json_path, "r") as file:
            self.locations = self._normalize_locations(json.load(file))

    def _normalize_locations(self, locations):
        """Normalize all names in the locations data to lowercase."""
        for province in locations.get("provinceList", []):
            province["name"] = province["name"].lower()
            for district in province.get("districtList", []):
                district["name"] = district["name"].lower()
                for municipality in district.get("municipalityList", []):
                    municipality["name"] = municipality["name"].lower()
        return locations

    def _preprocess(self, text):
        """Normalize user input to lowercase."""
        return text.lower().replace("province", "").strip() if text else None

    def _find_best_match(self, input_value, options, score_cutoff=75):
        """Find the best match using fuzzy matching."""
        if not input_value or not options:
            return None

        # Preprocess the input value
        input_value = self._preprocess(input_value)

        # Perform fuzzy matching
        match = process.extractOne(input_value, options, score_cutoff=score_cutoff)
        return match[0] if match else None

    def validate_location(self, province, district, municipality, qr_province=None, qr_district=None):
        """Validate location data with optional QR-provided defaults."""
        # Extract province list
        province_list = self.locations.get("provinceList", [])
        province_names = [p["name"] for p in province_list]

        # Validate QR-provided province or find best match
        qr_province = self._find_best_match(qr_province, province_names) if qr_province else None
        matched_province = qr_province or self._find_best_match(province, province_names)
        if not matched_province:
            return {"error": f"'{province}' is not a valid province."}

        # Get the selected province's districts
        selected_province = next(
            (p for p in province_list if p["name"] == matched_province), None
        )
        if not selected_province:
            return {"error": f"'{matched_province}' is not a valid province in the database."}

        district_list = selected_province.get("districtList", [])
        district_names = [d["name"] for d in district_list]

        # Validate QR-provided district or find best match
        qr_district = self._find_best_match(qr_district, district_names) if qr_district else None
        matched_district = qr_district or self._find_best_match(district, district_names)
        if not matched_district:
            return {"error": f"'{district}' is not a valid district in {selected_province['name']}."}

        # Get the selected district's municipalities
        selected_district = next(
            (d for d in district_list if d["name"] == matched_district), None
        )
        if not selected_district:
            return {"error": f"'{matched_district}' is not a valid district in {matched_province}."}

        municipality_list = selected_district.get("municipalityList", [])
        municipality_names = [m["name"] for m in municipality_list]

        # Validate municipality
        matched_municipality = self._find_best_match(municipality, municipality_names)
        if not matched_municipality:
            return {"error": f"'{municipality}' is not a valid municipality in {matched_district}."}

        # Return the matches
        return {
            "province": matched_province,
            "district": matched_district,
            "municipality": matched_municipality,
        }


# Test the function in a notebook
json_path = "/home/ubuntu/nepal_chatbot/resources/nepal_location.json"

# Initialize the validator
validator = LocationValidator(json_path)

# Mock inputs for testing
province_input = ""  # User didn't input province (rely on QR)
district_input = ""  # User didn't input district (rely on QR)
municipality_input = "Ducheshwar"

# Pre-filled values from QR code
qr_province = "bagmathi"  # Simulated typo in QR-encoded province
qr_district = "NUWAKOTh"  # Simulated typo in QR-encoded district

# Validate and display results
result = validator.validate_location(province_input, district_input, municipality_input, qr_province, qr_district)
print(result)


{'province': 'bagmati province', 'district': 'nuwakot', 'municipality': 'dupcheshwar rural municipality'}


In [None]:
# Mock inputs for testing
province_input = "bagmathi"
district_input = "NUWAKOTh"
municipality_input = "Ducheshwar"

In [3]:
import socketio
import nest_asyncio
import asyncio

# Apply nest_asyncio to allow nested event loops in Jupyter
nest_asyncio.apply()

# Create a Socket.IO client
sio = socketio.AsyncClient()

# Define the connect event handler
@sio.event
async def connect():
    print("Connected to Rasa server")

# Define the handler for bot responses
@sio.on("bot_uttered")
async def on_message(data):
    print("Bot response:", data)

# Define the disconnect event handler (optional)
@sio.event
async def disconnect():
    print("Disconnected from Rasa server")

async def main():
    # Connect to the Rasa WebSocket endpoint
    await sio.connect("http://localhost:5005/socket.io/?EIO=4&transport=websocket")
    
    # Send a user message
    await sio.emit("user_uttered", {"message": "hello", "metadata": {}})
    
    # Wait to keep the connection alive
    await sio.wait()

# Run the main function using the existing event loop
asyncio.get_event_loop().run_until_complete(main())


ConnectionError: Unexpected status code 400 in server response