# SciPy Spatial Data
This notebook covers distance metrics, KD-trees, geometric structures, and Voronoi partitioning with practical examples.

In [None]:
import numpy as np
from scipy.spatial import distance, cKDTree, ConvexHull, Delaunay, Voronoi

np.set_printoptions(precision=4, suppress=True)

## 1. Distance Metrics
Distance metrics are foundational for similarity search, segmentation, and anomaly scoring.

In [None]:
rng = np.random.default_rng(52)

customers = rng.normal(loc=[50, 55, 60], scale=[8, 10, 7], size=(100, 3))
segments = np.array([[45, 50, 58], [60, 65, 62], [52, 40, 48]], dtype=float)
d = distance.cdist(customers, segments, metric='euclidean')
assigned = np.argmin(d, axis=1)

print("Example 1: Customer segment assignment")
print("Counts per segment:", np.bincount(assigned, minlength=segments.shape[0]).tolist())

embeddings = rng.normal(size=(120, 16))
center = embeddings.mean(axis=0)
outlier_score = distance.cdist(embeddings, center[None, :], metric='euclidean').ravel()
outliers = np.argsort(outlier_score)[-5:]

print()
print("Example 2: Embedding outlier detection")
print("Top outlier indices:", outliers.tolist())

## 2. KD-Tree Nearest Neighbor Search
KD-trees accelerate nearest-neighbor and radius queries for location-based systems.

In [None]:
rng = np.random.default_rng(25)

warehouses = rng.uniform([0, 0], [100, 100], size=(12, 2))
stores = rng.uniform([0, 0], [100, 100], size=(40, 2))
tree = cKDTree(warehouses)
store_dist, store_idx = tree.query(stores, k=1)

print("Example 1: Store to nearest warehouse")
print(f"Average nearest distance: {store_dist.mean():.2f}")
print(f"Max nearest distance: {store_dist.max():.2f}")

query_point = np.array([50, 50])
nearby = tree.query_ball_point(query_point, r=25)

print()
print("Example 2: Radius query around city center")
print(f"Warehouses within radius 25: {len(nearby)}")
print("Indices:", nearby)

## 3. Convex Hull and Delaunay Triangulation
These structures are useful for boundary analysis, meshing, and spatial validity checks.

In [None]:
rng = np.random.default_rng(40)

service_points = rng.uniform([10, 15], [90, 80], size=(60, 2))
hull = ConvexHull(service_points)

print("Example 1: Delivery coverage boundary")
print(f"Hull vertices count: {len(hull.vertices)}")
print(f"Coverage area: {hull.volume:.2f}")
print(f"Boundary perimeter: {hull.area:.2f}")

terrain_points = rng.uniform([0, 0], [50, 50], size=(45, 2))
tri = Delaunay(terrain_points)
query = np.array([[10, 10], [30, 12], [40, 40], [55, 12]])
inside = tri.find_simplex(query) >= 0

print()
print("Example 2: Triangulation inclusion test")
print("Inside domain flags:", inside.tolist())

## 4. Voronoi Partitioning
Voronoi partitions represent nearest-service regions in logistics and emergency planning.

In [None]:
rng = np.random.default_rng(70)

stations = rng.uniform([0, 0], [100, 100], size=(10, 2))
vor = Voronoi(stations)
finite_regions = [r for r in vor.regions if len(r) > 0 and -1 not in r]

print("Example 1: Emergency station partitioning")
print(f"Finite Voronoi regions: {len(finite_regions)}")

incidents = rng.uniform([0, 0], [100, 100], size=(25, 2))
station_tree = cKDTree(stations)
incident_dist, incident_assign = station_tree.query(incidents, k=1)

print()
print("Example 2: Incident dispatch assignment")
print("Assigned station indices:", incident_assign.tolist())
print(f"Average dispatch distance: {incident_dist.mean():.2f}")