Call API to get the zone of the location and build KNN model

In [None]:
import requests
import time
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import StringType
import pandas as pd
import pickle

# Define your zone keywords
ZONES = {
    "โซนโรงเรียน/การศึกษา": ["school", "university", "college", "kindergarten", "education", "academy", "educational_institution"],
    "โซนตลาด/พาณิชย์": ["marketplace", "market", "bazaar", "wet_market", "fresh_market", "trading_area"],
    "โซนท่องเที่ยว/ศิลปวัฒนธรรม": ["museum", "monument", "tourism", "attraction", "temple", "viewpoint", "historic", "cultural", "artwork"],
    "โซนที่พักอาศัย": ["residential", "apartment", "house", "condominium", "flat", "residence", "housing"],
    "โซนสำนักงาน/หน่วยงานรัฐ": ["office", "public", "government", "townhall", "civic", "embassy", "courthouse"],
    "โซนอุตสาหกรรม/โรงงาน": ["industrial", "factory", "manufacture", "plant", "warehouse"],
    "โซนก่อสร้าง": ["construction", "building_site", "development"],
    "โซนถนน/คมนาคม": ["highway", "road", "street", "transport", "bus_station", "railway", "junction", "subway_entrance", "parking"],
    "โซนแหล่งน้ำ/คลอง": ["canal", "waterway", "river", "stream", "reservoir", "pond", "lake"],
    "โซนห้าง/คอมมูนิตี้มอลล์": ["mall", "shopping_centre", "supermarket", "store", "retail", "department_store", "shopping", "commercial"],
    "โซนศาสนา/สงบ": ["place_of_worship", "temple", "church", "mosque", "shrine", "spiritual", "religious"],
    "โซนสาธารณสุข": ["hospital", "clinic", "healthcare", "pharmacy", "medical", "emergency"],
    "โซนพื้นที่สีเขียว/สวนสาธารณะ": ["park", "garden", "greenfield", "forest", "recreation_ground", "nature_reserve"],
    "โซนชุมชนแออัด/ชุมชนดั้งเดิม": ["slum", "village", "community", "settlement", "camp", "squatter"],
    "โซนสถานบันเทิง/ร้านอาหาร": ["bar", "pub", "restaurant", "entertainment", "nightclub", "karaoke", "cafe", "food_court"]
}

# Bangkok bounding box: (approx.) North, South, East, West
BANGKOK_BBOX = (13.0, 100.3, 14.2, 100.98)  # (S, W, N, E)

def query_overpass_bulk(south, west, north, east):
    overpass_url = "http://overpass-api.de/api/interpreter"
    query = f"""
    [out:json][timeout:90];
    (
      node({south},{west},{north},{east});
      way({south},{west},{north},{east});
      relation({south},{west},{north},{east});
    );
    out center tags;
    """
    time.sleep(1)
    response = requests.post(overpass_url, data=query)
    response.raise_for_status()
    return response.json()

def extract_tagged_zone_points(data):
    points = []
    for el in data.get("elements", []):
        tags = el.get("tags", {})
        lat = el.get("lat") or el.get("center", {}).get("lat")
        lon = el.get("lon") or el.get("center", {}).get("lon")

        if lat is None or lon is None:
            continue

        for zone, keywords in ZONES.items():
            if any(
                kw in str(tags.get(k, "")).lower()
                or k.lower() in keywords
                for k, v in tags.items()
                for kw in keywords
            ):
                points.append((lat, lon, zone))
                break
    return points

def build_knn_zone_classifier(zone_points):
    coords = np.array([[lat, lon] for lat, lon, _ in zone_points])
    labels = [zone for _, _, zone in zone_points]
    knn = KNeighborsClassifier(n_neighbors=1)
    knn.fit(coords, labels)
    return knn

# Step 1: Fetch all OSM tagged points in Bangkok
osm_data = query_overpass_bulk(*BANGKOK_BBOX)

# Step 2: Extract those with a match to ZONES
zone_points = extract_tagged_zone_points(osm_data)

# Step 3: Build KNN model
knn = build_knn_zone_classifier(zone_points)

# Step 4: Save KNN model then save iton Google Drive
with open("knn.pkl", "wb") as f:
    pickle.dump(knn, f)

In [None]:
print(len(zone_points))

680743


In [None]:
knn

In [None]:
# Try to use the KNN model

latitude = 13.740068235694176
longitude = 100.53403201037855

# Single coordinate as a 2D array
coord = np.array([[latitude, longitude]])

# Predict zone
predicted_zone = knn.predict(coord)[0]

print(f"The predicted zone is: {predicted_zone}")

The predicted zone is: โซนโรงเรียน/การศึกษา
