Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Intercept Coincident points #548

Merged
merged 37 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
a5c63c1
[WIP] start intercepting coincidence, probably should use a decorator…
ljwolf Aug 10, 2023
b22f2e5
finalize decorator implementation for _triangulation
ljwolf Aug 11, 2023
8aac540
finish jitter implementation
ljwolf Aug 11, 2023
d0b7c0d
propagate coincident to build_triangulation
ljwolf Aug 11, 2023
ca5ec9a
add reasons to assert clauses
ljwolf Aug 11, 2023
36a18e3
voronoi should still admit kernels
ljwolf Aug 11, 2023
2745af4
move to None for defaults, not boxcar/inf
ljwolf Aug 11, 2023
e1b243a
add distance_band
martinfleis Aug 7, 2023
cda1ef9
channel distance band through kernel
martinfleis Aug 7, 2023
ab207dd
fix isolates in distance band
martinfleis Aug 7, 2023
d4f6c73
lag, parquet IO, __getitem__, block placeholder
martinfleis Aug 10, 2023
3e20546
docstrings, optional pyarrow
martinfleis Aug 10, 2023
6ac2b7c
fix test
martinfleis Aug 10, 2023
bed12a8
block contiguity
martinfleis Aug 10, 2023
7671b77
add tests
martinfleis Aug 10, 2023
f6ecc00
to-dos
martinfleis Aug 10, 2023
6f283f8
Apply suggestions from code review
martinfleis Aug 10, 2023
c25db44
test block_contiguity
martinfleis Aug 10, 2023
3d27594
test parquet without meta
martinfleis Aug 10, 2023
ea180bd
test lag
martinfleis Aug 10, 2023
017ef80
haversine todo note
martinfleis Aug 11, 2023
d30f63b
comment on diag
martinfleis Aug 11, 2023
1754691
[WIP] work through integrations of kernel and triangulation in prep t…
ljwolf Aug 11, 2023
c318989
move back to upstream kernel
ljwolf Aug 11, 2023
fff6587
migrate checks for arrays, complete test battery
ljwolf Aug 11, 2023
a8be157
Merge remote-tracking branch 'upstream/geographs' into geographs
ljwolf Aug 11, 2023
c5cbe32
fix docstrings
ljwolf Aug 11, 2023
36d88b0
Update libpysal/graph/_triangulation.py
ljwolf Aug 11, 2023
2f18e35
document coincident option for graph
sjsrey Aug 25, 2023
d946fec
add coincidence checks to knn
ljwolf Aug 25, 2023
de99dd5
Merge pull request #2 from sjsrey/ljwgeographs
ljwolf Aug 25, 2023
6bdd4bc
Update libpysal/graph/_utils.py
ljwolf Aug 25, 2023
008b0e8
Update libpysal/graph/_utils.py
ljwolf Aug 25, 2023
ae59ad7
Update libpysal/graph/_utils.py
ljwolf Aug 25, 2023
cbf309b
finalize coincident checks in knn
ljwolf Aug 25, 2023
82f3e18
add the reorder table function to base, prepare for sorting inputs to…
ljwolf Aug 25, 2023
c4383e1
handle ids is none
ljwolf Aug 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions libpysal/graph/_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def _identity(distances, bandwidth):
"boxcar": _boxcar,
"discrete": _boxcar,
"identity": _identity,
None: _identity,
}


Expand All @@ -61,6 +60,7 @@ def _kernel(
k=None,
ids=None,
p=2,
taper=False,
ljwolf marked this conversation as resolved.
Show resolved Hide resolved
):
"""
Compute a kernel function over a distance matrix.
Expand Down Expand Up @@ -127,15 +127,20 @@ def _kernel(
D = sparse.csc_array(coordinates)
if bandwidth is None:
bandwidth = numpy.percentile(D.data, 25)
elif bandwidth == "opt":
bandwidth = _optimize_bandwidth(D, kernel)
elif bandwidth == "auto":
if (kernel == 'identity') or (kernel is None):
bandwidth = numpy.nan # ignored by identity
else:
bandwidth = _optimize_bandwidth(D, kernel)
if callable(kernel):
smooth = kernel(D.data, bandwidth)
else:
smooth = _kernel_functions[kernel](D.data, bandwidth)

sp = sparse.csc_array((smooth, D.indices, D.indptr), dtype=smooth.dtype)
sp.eliminate_zeros()

if taper:
sp.eliminate_zeros()

return sp, ids

Expand Down Expand Up @@ -228,7 +233,10 @@ def _optimize_bandwidth(D, kernel):
uniform distribution of kernel values, which is a good proxy for a
"moderate" level of smoothing.
"""
kernel_function = _kernel_functions[kernel]
kernel_function = _kernel_functions.get(kernel, kernel)
assert callable(kernel_function), (
f"kernel {kernel} was not in supported kernel types {_kernel_functions.keys()} or callable"
)

def _loss(bandwidth, D=D, kernel_function=kernel_function):
Ku = kernel_function(D.data, bandwidth)
Expand Down
3 changes: 3 additions & 0 deletions libpysal/graph/_set_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def __iand__(self, other):
def __ior__(self, other):
raise TypeError("weights are immutable")

def __len__(self):
return self.n_edges


# TODO: performance test the pandas implementation
def intersects(left, right):
Expand Down
167 changes: 97 additions & 70 deletions libpysal/graph/_triangulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@

import numpy
import pandas
from scipy import spatial
from scipy import spatial, sparse

from ._contiguity import _vertex_set_intersection
from ._kernel import _kernel_functions
from ._utils import _validate_geometry_input

from ._kernel import _kernel, _optimize_bandwidth, _kernel_functions
from ._utils import (
_validate_geometry_input,
_build_coincidence_lookup,
_induce_cliques,
_jitter_geoms,
_vec_euclidean_distances
)
from libpysal.cg import voronoi_frames
from functools import wraps

try:
from numba import njit # noqa E401
Expand All @@ -23,8 +29,75 @@
Serge Rey (sjsrey@gmail.com)
"""

# This is in the module, rather than in `utils`, to ensure that it
# can access `_VALID_GEOMETRY_TYPES` without defining a nested decorator.
def _validate_coincident(triangulator):
"""This is a decorator that validates input for coincident points"""
@wraps(triangulator)
def tri_with_validation(coordinates, ids=None, coincident='raise', kernel=None, bandwidth=None, **kwargs):
coordinates, ids, geoms = _validate_geometry_input(
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)
n_coincident, coincident_lut = _build_coincidence_lookup(geoms)
if n_coincident > 0:
if coincident == "raise":
raise ValueError(
f"There are {len(coincident_lut)} "
f"unique locations in the dataset, but {len(geoms)} observations."
"This means there are multiple points in the same location, which"
" is undefined for this graph type. To address this issue, consider setting "
" `coincident='clique' or consult the documentation about coincident points."
)
elif coincident == "jitter":
coordinates, geoms = _jitter_geoms(coordinates, geoms)
elif coincident == "clique":
input_coordinates, input_ids, input_geoms = coordinates, ids, geoms
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Are input_coordinates, input_ids, and input_geoms actually used anywhere?
  • Should _induce_cliques() be used within this chunk or only down in L92?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, input_coordinates, input_ids, and input_geoms aren't currently used again, but we probably will need at least input_ids later when we implement re-ordering the output. Not done yet.

coordinates, ids, geoms = _validate_geometry_input(
coincident_lut.geometry, ids=coincident_lut.index, valid_geometry_types=_VALID_GEOMETRY_TYPES
)
else:
raise ValueError(
f"Recieved option `coincident='{coincident}', but only options 'raise','clique','jitter' are suppported."
)
heads_ix, tails_ix = triangulator(coordinates, **kwargs)

heads, tails = ids[heads_ix], ids[tails_ix]

if kernel is None:
weights = numpy.ones(heads_ix.shape, dtype=numpy.int8)
else:
distances = _vec_euclidean_distances(coordinates[heads_ix], coordinates[tails_ix]).squeeze()
sparse_D = sparse.csc_array((distances, (heads_ix, tails_ix)))
if bandwidth == "auto":
bandwidth = _optimize_bandwidth(sparse_D, kernel)
_k = _kernel(
sparse_D,
metric='precomputed',
kernel=kernel,
bandwidth=bandwidth,
taper=False
ljwolf marked this conversation as resolved.
Show resolved Hide resolved
)[0]
weights = _k.data
adjtable = pandas.DataFrame.from_dict(
dict(
focal = heads, neighbor = tails, weight = weights
)
)

if (n_coincident > 0) & (coincident == "clique"):
# note that the kernel is only used to compute a fill value for the clique.
# in the case of the voronoi weights. Using boxcar with an infinite bandwidht
ljwolf marked this conversation as resolved.
Show resolved Hide resolved
# also gives us the correct fill value for the voronoi weight: 1.
fill_value = _kernel_functions[kernel](numpy.array([0]), bandwidth).item()
adjtable = _induce_cliques(adjtable, coincident_lut, fill_value=fill_value)
# from here, how to ensure ordering?
return adjtable.focal.values, adjtable.neighbor.values, adjtable.weight.values
return tri_with_validation


def _delaunay(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):

@_validate_coincident
def _delaunay(coordinates):
"""
Constructor of the Delaunay graph of a set of input points.
Relies on scipy.spatial.Delaunay and numba to quickly construct
Expand Down Expand Up @@ -79,31 +152,14 @@ def _delaunay(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
" to accelerate the computation of graphs. Without numba,"
" these computations may become unduly slow on large data."
)
coordinates, ids, _ = _validate_geometry_input(
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)

edges, _ = _voronoi_edges(coordinates)
heads_ix, tails_ix = edges.T

ids = numpy.asarray(ids)
head, tail = ids[edges[:, 0]], ids[edges[:, 1]]

if bandwidth is numpy.inf and kernel == "boxcar":
weights = numpy.ones(head.shape, dtype=numpy.int8)
else:
distances = ((coordinates[edges[:, 0]] - coordinates[edges[:, 1]]) ** 2).sum(
axis=1
).squeeze() ** 0.5
weights = _kernel_functions[kernel](distances, bandwidth)

# TODO: check for coincident points which result in
# dropped points from the triangulation and the
# misalignment of the weights and the attribute array

return head, tail, weights

return heads_ix, tails_ix

def _gabriel(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
@_validate_coincident
def _gabriel(coordinates):
"""
Constructs the Gabriel graph of a set of points. This graph is a subset of
the Delaunay triangulation where only "short" links are retained. This
Expand Down Expand Up @@ -148,35 +204,19 @@ def _gabriel(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
" to accelerate the computation of graphs. Without numba,"
" these computations may become unduly slow on large data."
)
coordinates, ids, _ = _validate_geometry_input(
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)

edges, dt = _voronoi_edges(coordinates)
droplist = _filter_gabriel(
edges,
dt.points,
)
output = numpy.row_stack(list(set(map(tuple, edges)).difference(set(droplist))))
ids = numpy.asarray(ids)
head, tail = ids[output[:, 0]], ids[output[:, 1]]

if bandwidth is numpy.inf and kernel == "boxcar":
weights = numpy.ones(head.shape, dtype=numpy.int8)
else:
distances = ((coordinates[output[:, 0]] - coordinates[output[:, 1]]) ** 2).sum(
axis=1
).squeeze() ** 0.5
weights = _kernel_functions[kernel](distances, bandwidth)

# TODO: check for coincident points which result in
# dropped points from the triangulation and the
# misalignment of the weights and the attribute array
heads_ix, tails_ix = numpy.row_stack(list(set(map(tuple, edges)).difference(set(droplist)))).T

return head, tail, weights

return heads_ix, tails_ix

def _relative_neighborhood(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
@_validate_coincident
def _relative_neighborhood(coordinates):
"""
Constructs the Relative Neighborhood graph from a set of points.
This graph is a subset of the Delaunay triangulation, where only
Expand Down Expand Up @@ -219,26 +259,18 @@ def _relative_neighborhood(coordinates, ids=None, bandwidth=numpy.inf, kernel="b
" to accelerate the computation of graphs. Without numba,"
" these computations may become unduly slow on large data."
)
coordinates, ids, _ = _validate_geometry_input(
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)


edges, dt = _voronoi_edges(coordinates)
output, _ = _filter_relativehood(edges, dt.points, return_dkmax=False)

head_ix, tail_ix, distance = zip(*output)

head, tail = ids[numpy.asarray(head_ix)], ids[numpy.asarray(tail_ix)]
heads_ix, tails_ix, distance = zip(*output)
heads_ix, tails_ix = numpy.asarray(heads_ix), numpy.asarray(tails_ix)

if bandwidth is numpy.inf and kernel == "boxcar":
weights = numpy.ones(head.shape, dtype=numpy.int8)
else:
weights = _kernel_functions[kernel](numpy.array(distance), bandwidth)
return heads_ix, tails_ix

return head, tail, weights


def _voronoi(coordinates, ids=None, clip="extent", rook=True):
@_validate_coincident
def _voronoi(coordinates, clip="extent", rook=True):
"""
Compute contiguity weights according to a clipped
Voronoi diagram.
Expand Down Expand Up @@ -290,13 +322,10 @@ def _voronoi(coordinates, ids=None, clip="extent", rook=True):
delaunay triangulations in many applied contexts and
generally will remove "long" links in the delaunay graph.
"""
coordinates, ids, _ = _validate_geometry_input(
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)

cells, _ = voronoi_frames(coordinates, clip=clip)
return _vertex_set_intersection(cells, rook=rook, ids=ids)
heads_ix, tails_ix, weights = _vertex_set_intersection(cells, rook=rook)

return heads_ix, tails_ix

#### utilities

Expand Down Expand Up @@ -342,7 +371,6 @@ def _filter_gabriel(edges, coordinates):
in order to construct the Gabriel graph.
"""
edge_pointer = 0
edges.max()
n_edges = len(edges)
to_drop = []
while edge_pointer < n_edges:
Expand Down Expand Up @@ -382,8 +410,7 @@ def _filter_relativehood(edges, coordinates, return_dkmax=False):
3. for each edge of the delaunay (i,j), prune
if any dkmax is greater than d(i,j)
"""
n = edges.max()
len(edges)
n_edges = len(edges)
out = []
r = []
for edge in edges:
Expand All @@ -393,7 +420,7 @@ def _filter_relativehood(edges, coordinates, return_dkmax=False):
dkmax = 0
dij = ((pi - pj) ** 2).sum() ** 0.5
prune = False
for k in range(n):
for k in range(n_edges):
pk = coordinates[k]
dik = ((pi - pk) ** 2).sum() ** 0.5
djk = ((pj - pk) ** 2).sum() ** 0.5
Expand All @@ -420,4 +447,4 @@ def _voronoi_edges(coordinates):
.drop_duplicates()
.values
)
return edges, dt
return edges, dt
Loading
Loading