diff --git a/libpysal/graph/_kernel.py b/libpysal/graph/_kernel.py index 57ae58b12..aa6dce0b5 100644 --- a/libpysal/graph/_kernel.py +++ b/libpysal/graph/_kernel.py @@ -3,11 +3,11 @@ from scipy import optimize, sparse, spatial, stats from ._utils import ( - _build_coincidence_lookup, + CoplanarError, + _build_coplanarity_lookup, _induce_cliques, _jitter_geoms, _reorder_adjtable_by_ids, - _resolve_islands, _sparse_to_arrays, _validate_geometry_input, ) @@ -78,7 +78,8 @@ def _kernel( ids=None, p=2, taper=True, - coincident="raise", + coplanar="raise", + resolve_isolates=True, ): """ Compute a kernel function over a distance matrix. @@ -124,6 +125,8 @@ def _kernel( parameter for minkowski metric, ignored if metric != "minkowski". taper : bool (default: True) remove links with a weight equal to zero + resolve_isolates : bool + Try to resolve isolates. Can be disabled if we are dealing with cliques later. """ if metric != "precomputed": coordinates, ids, _ = _validate_geometry_input( @@ -152,7 +155,7 @@ def _kernel( if k is not None: if metric != "precomputed": - d = _knn(coordinates, k=k, metric=metric, p=p, coincident=coincident) + d = _knn(coordinates, k=k, metric=metric, p=p, coplanar=coplanar) else: d = coordinates * (coordinates.argsort(axis=1, kind="stable") < (k + 1)) else: @@ -201,20 +204,21 @@ def _kernel( if taper: d.eliminate_zeros() - heads, tails, weights = _sparse_to_arrays(d, ids=ids) + return _sparse_to_arrays(d, ids=ids, resolve_isolates=resolve_isolates) - return _resolve_islands(heads, tails, ids, weights) - -def _knn(coordinates, metric="euclidean", k=1, p=2, coincident="raise"): +def _knn(coordinates, metric="euclidean", k=1, p=2, coplanar="raise"): """internal function called only within _kernel, never directly to build KNN""" coordinates, ids, geoms = _validate_geometry_input( coordinates, ids=None, valid_geometry_types=_VALID_GEOMETRY_TYPES ) - n_coincident = geoms.geometry.duplicated().sum() + if coplanar == "jitter": + coordinates, geoms = _jitter_geoms(coordinates, geoms=geoms) + + n_coplanar = geoms.geometry.duplicated().sum() n_samples, _ = coordinates.shape - if n_coincident == 0: + if n_coplanar == 0: if metric == "haversine": # sklearn haversine works with (lat,lng) in radians... coordinates = numpy.fliplr(numpy.deg2rad(coordinates)) @@ -233,38 +237,47 @@ def _knn(coordinates, metric="euclidean", k=1, p=2, coincident="raise"): ) return d else: - coincident_lut = _build_coincidence_lookup(geoms) - max_at_one_site = coincident_lut.input_index.str.len().max() - if coincident == "raise": - raise ValueError( - f"There are {len(coincident_lut)} unique locations in the dataset, " - f"but {len(coordinates)} observations. At least one of these sites " - f"has {max_at_one_site} points, more than the {k} nearest neighbors " - f"requested. This means there are more than {k} points in the same " - "location, which makes this graph type undefined. To address " - "this issue, consider setting `coincident='clique'` or consult the " - "documentation about coincident points." + coplanar_lookup, nearest = _build_coplanarity_lookup(geoms) + _, counts = numpy.unique(nearest, return_counts=True) + max_at_one_site = counts.max() + if coplanar == "raise": + raise CoplanarError( + f"There are {len(coordinates) - len(coplanar_lookup)} unique locations " + f"in the dataset, but {len(coordinates)} observations. At least one of " + f"these sites has {max_at_one_site} points, more than the {k} nearest " + f"neighbors requested. This means there are more than {k} points in " + "the same location, which makes this graph type undefined. To address " + "this issue, consider setting `coplanar='clique'` or consult the " + "documentation about coplanar points." ) - if coincident == "jitter": - # force re-jittering over and over again until the coincidence is broken + if coplanar == "jitter": + # force re-jittering over and over again until the coplanarity is broken return _knn( _jitter_geoms(coordinates, geoms)[-1], metric=metric, k=k, p=p, - coincident="jitter", + coplanar="jitter", ) - if coincident == "clique": + if coplanar == "clique": heads, tails, weights = _sparse_to_arrays( _knn( - coincident_lut.geometry, metric=metric, k=k, p=p, coincident="raise" + numpy.delete(coordinates, coplanar_lookup, 0), + metric=metric, + k=k, + p=p, + coplanar="raise", ) ) adjtable = pandas.DataFrame.from_dict( {"focal": heads, "neighbor": tails, "weight": weights} ) - adjtable = _induce_cliques(adjtable, coincident_lut, fill_value=-1) + adjtable = _induce_cliques( + adjtable, coplanar_lookup, nearest, fill_value=-1 + ) + adjtable["focal"] = ids[adjtable.focal] + adjtable["neighbor"] = ids[adjtable.neighbor] adjtable = _reorder_adjtable_by_ids(adjtable, ids) sparse_out = sparse.csr_array( ( @@ -276,7 +289,7 @@ def _knn(coordinates, metric="euclidean", k=1, p=2, coincident="raise"): sparse_out.data[sparse_out.data < 0] = 0 return sparse_out raise ValueError( - f"'{coincident}' is not a valid option. Use one of " + f"'{coplanar}' is not a valid option. Use one of " "['raise', 'jitter', 'clique']." ) diff --git a/libpysal/graph/_triangulation.py b/libpysal/graph/_triangulation.py index 4dfb42b6a..8ce8539a9 100644 --- a/libpysal/graph/_triangulation.py +++ b/libpysal/graph/_triangulation.py @@ -10,7 +10,7 @@ from ._contiguity import _vertex_set_intersection from ._kernel import _kernel, _kernel_functions, _optimize_bandwidth from ._utils import ( - _build_coincidence_lookup, + CoplanarError, _induce_cliques, _jitter_geoms, _reorder_adjtable_by_ids, @@ -39,59 +39,36 @@ # This is in the module, rather than in `utils`, to ensure that it # can access `_VALID_GEOMETRY_TYPES` without defining a nested decorator. -def _validate_coincident(triangulator): - """This is a decorator that validates input for coincident points""" +def _validate_coplanar(triangulator): + """This is a decorator that validates input for coplanar points""" @wraps(triangulator) def tri_with_validation( coordinates, ids=None, - coincident="raise", + coplanar="raise", kernel=None, bandwidth=None, seed=None, **kwargs, ): + if coplanar not in ["raise", "jitter", "clique"]: + raise ValueError( + f"Recieved option coplanar='{coplanar}', but only options " + "'raise','clique','jitter' are suppported." + ) + # validate geometry input - coordinates, ids, geoms = _validate_geometry_input( + coordinates, ids, _ = _validate_geometry_input( coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES ) - - # check for coincident points - n_coincident = geoms.geometry.duplicated().sum() - - # resolve coincident points prior triangulation - if n_coincident > 0: - coincident_lut = _build_coincidence_lookup(geoms) - if coincident == "raise": - raise ValueError( - f"There are {len(coincident_lut)} unique locations in " - f"the dataset, but {len(geoms)} observations. This means there " - "are multiple points in the same location, which is undefined " - "for this graph type. To address this issue, consider setting " - "`coincident='clique'` or consult the documentation about " - "coincident points." - ) - elif coincident == "jitter": - coordinates, geoms = _jitter_geoms(coordinates, geoms, seed=seed) - elif coincident == "clique": - input_coordinates, input_ids, input_geoms = coordinates, ids, geoms - coordinates, ids, geoms = _validate_geometry_input( - coincident_lut.geometry, - ids=coincident_lut.index, - valid_geometry_types=_VALID_GEOMETRY_TYPES, - ) - else: - raise ValueError( - f"Recieved option coincident='{coincident}', but only options " - "'raise','clique','jitter' are suppported." - ) + if coplanar == "jitter": + coordinates = _jitter_geoms(coordinates, seed=seed) # generate triangulation (triangulator is the wrapped function) - heads_ix, tails_ix = triangulator(coordinates, **kwargs) - - # map ids - heads, tails = ids[heads_ix], ids[tails_ix] + heads_ix, tails_ix, coplanar_loopkup = triangulator( + coordinates, coplanar, **kwargs + ) # process weights if kernel is None: @@ -103,20 +80,21 @@ def tri_with_validation( sparse_d = sparse.csc_array((distances, (heads_ix, tails_ix))) if bandwidth == "auto": bandwidth = _optimize_bandwidth(sparse_d, kernel) - _, _, weights = _kernel( + heads_ix, tails_ix, weights = _kernel( sparse_d, metric="precomputed", kernel=kernel, bandwidth=bandwidth, taper=False, + resolve_isolates=False, # no isolates in triangulation ) # create adjacency adjtable = pandas.DataFrame.from_dict( - {"focal": heads, "neighbor": tails, "weight": weights} + {"focal": heads_ix, "neighbor": tails_ix, "weight": weights} ) # reinsert points resolved via clique - if (n_coincident > 0) & (coincident == "clique"): + if (coplanar_loopkup.shape[0] > 0) & (coplanar == "clique"): # Note that the kernel is only used to compute a fill value for the clique. # In the case of the voronoi weights. Using boxcar with an infinite # bandwidth also gives us the correct fill value for the voronoi weight: 1. @@ -126,11 +104,15 @@ def tri_with_validation( fill_value = _kernel_functions[kernel]( numpy.array([0]), bandwidth ).item() - adjtable = _induce_cliques(adjtable, coincident_lut, fill_value=fill_value) - coordinates, ids, geoms = input_coordinates, input_ids, input_geoms - heads, tails, weights = adjtable.values.T + coplanar, _, nearest = coplanar_loopkup.T + adjtable = _induce_cliques(adjtable, coplanar, nearest, fill_value) + adjtable["focal"] = ids[adjtable.focal] + adjtable["neighbor"] = ids[adjtable.neighbor] adjtable = _reorder_adjtable_by_ids(adjtable, ids) + else: + adjtable["focal"] = ids[adjtable.focal] + adjtable["neighbor"] = ids[adjtable.neighbor] # return data for Graph.from_arrays return adjtable.focal.values, adjtable.neighbor.values, adjtable.weight.values @@ -138,8 +120,8 @@ def tri_with_validation( return tri_with_validation -@_validate_coincident -def _delaunay(coordinates): +@_validate_coplanar +def _delaunay(coordinates, coplanar): """ Constructor of the Delaunay graph of a set of input points. Relies on scipy.spatial.Delaunay and numba to quickly construct @@ -167,21 +149,21 @@ def _delaunay(coordinates): kernel : string or callable kernel function to use in order to weight the output graph. See the kernel() function for more details. - coincident : string (default: "raise") - How to deal with coincident points. Coincident points make all triangulations + coplanar : string (default: "raise") + How to deal with coplanar points. Coplanar points make all triangulations ill-posed, and thus they need to be addressed in order to create a valid graph. This parameter must be one of the following: - * "raise": raise an error if coincident points are present. This is default. + * "raise": raise an error if coplanar points are present. This is default. * "jitter": jitter the input points by a small value. This makes the resulting depend on the seed provided to the triangulation function. - * "clique": expand coincident points into a graph clique. This creates a + * "clique": expand coplanar points into a graph clique. This creates a "unique points" triangulation using all of the unique locations in the data. Then, co-located samples are connected within a site. Finally, co-located samples are connected to other sites in the "unique points" triangulation. seed : int (default: None) An integer value used to ensure that the pseudorandom number generator provides the same value over replications. By default, no seed is used, so results - will be random every time. This is only used if coincident='jitter'. + will be random every time. This is only used if coplanar='jitter'. Notes ----- @@ -208,15 +190,14 @@ def _delaunay(coordinates): " these computations may become unduly slow on large data.", stacklevel=3, ) - - edges, _ = _voronoi_edges(coordinates) + edges, _, coplanar = _voronoi_edges(coordinates, coplanar) heads_ix, tails_ix = edges.T - return heads_ix, tails_ix + return heads_ix, tails_ix, coplanar -@_validate_coincident -def _gabriel(coordinates): +@_validate_coplanar +def _gabriel(coordinates, coplanar): """ Constructs the Gabriel graph of a set of points. This graph is a subset of the Delaunay triangulation where only "short" links are retained. This @@ -252,21 +233,21 @@ def _gabriel(coordinates): kernel : string or callable kernel function to use in order to weight the output graph. See the kernel() function for more details. - coincident : string (default: "raise") - How to deal with coincident points. Coincident points make all triangulations + coplanar : string (default: "raise") + How to deal with coplanar points. Coplanar points make all triangulations ill-posed, and thus they need to be addressed in order to create a valid graph. This parameter must be one of the following: - * "raise": raise an error if coincident points are present. This is default. + * "raise": raise an error if coplanar points are present. This is default. * "jitter": jitter the input points by a small value. This makes the resulting depend on the seed provided to the triangulation function. - * "clique": expand coincident points into a graph clique. This creates a + * "clique": expand coplanar points into a graph clique. This creates a "unique points" triangulation using all of the unique locations in the data. Then, co-located samples are connected within a site. Finally, co-located samples are connected to other sites in the "unique points" triangulation. seed : int (default: None) An integer value used to ensure that the pseudorandom number generator provides the same value over replications. By default, no seed is used, so results - will be random every time. This is only used if coincident='jitter'. + will be random every time. This is only used if coplanar='jitter'. """ if not HAS_NUMBA: warnings.warn( @@ -276,23 +257,22 @@ def _gabriel(coordinates): stacklevel=3, ) - edges, dt = _voronoi_edges(coordinates) + edges, points, coplanar = _voronoi_edges(coordinates, coplanar) droplist = _filter_gabriel( edges, - dt.points, + points, ) - heads_ix, tails_ix = numpy.row_stack( - list(set(map(tuple, edges)).difference(set(droplist))) - ).T + edges = numpy.row_stack(list(set(map(tuple, edges)).difference(set(droplist)))) + heads_ix, tails_ix = edges.T order = numpy.lexsort((tails_ix, heads_ix)) sorted_heads_ix = heads_ix[order] sorted_tails_ix = tails_ix[order] - return sorted_heads_ix, sorted_tails_ix + return sorted_heads_ix, sorted_tails_ix, coplanar -@_validate_coincident -def _relative_neighborhood(coordinates): +@_validate_coplanar +def _relative_neighborhood(coordinates, coplanar): """ Constructs the Relative Neighborhood graph from a set of points. This graph is a subset of the Delaunay triangulation, where only @@ -326,21 +306,21 @@ def _relative_neighborhood(coordinates): kernel : string or callable kernel function to use in order to weight the output graph. See the kernel() function for more details. - coincident : string (default: "raise") - How to deal with coincident points. Coincident points make all triangulations + coplanar : string (default: "raise") + How to deal with coplanar points. Coplanar points make all triangulations ill-posed, and thus they need to be addressed in order to create a valid graph. This parameter must be one of the following: - * "raise": raise an error if coincident points are present. This is default. + * "raise": raise an error if coplanar points are present. This is default. * "jitter": jitter the input points by a small value. This makes the resulting depend on the seed provided to the triangulation function. - * "clique": expand coincident points into a graph clique. This creates a + * "clique": expand coplanar points into a graph clique. This creates a "unique points" triangulation using all of the unique locations in the data. Then, co-located samples are connected within a site. Finally, co-located samples are connected to other sites in the "unique points" triangulation. seed : int (default: None) An integer value used to ensure that the pseudorandom number generator provides the same value over replications. By default, no seed is used, so results - will be random every time. This is only used if coincident='jitter'. + will be random every time. This is only used if coplanar='jitter'. """ if not HAS_NUMBA: warnings.warn( @@ -350,17 +330,17 @@ def _relative_neighborhood(coordinates): stacklevel=3, ) - edges, dt = _voronoi_edges(coordinates) - output, _ = _filter_relativehood(edges, dt.points, return_dkmax=False) + edges, points, coplanar = _voronoi_edges(coordinates, coplanar) + output, _ = _filter_relativehood(edges, points, return_dkmax=False) heads_ix, tails_ix, distance = zip(*output, strict=True) heads_ix, tails_ix = numpy.asarray(heads_ix), numpy.asarray(tails_ix) - return heads_ix, tails_ix + return heads_ix, tails_ix, coplanar -@_validate_coincident -def _voronoi(coordinates, clip="bounding_box", rook=True): +@_validate_coplanar +def _voronoi(coordinates, coplanar, clip="bounding_box", rook=True): """ Compute contiguity weights according to a clipped Voronoi diagram. @@ -400,21 +380,21 @@ def _voronoi(coordinates, clip="bounding_box", rook=True): Contiguity method. If True, two geometries are considered neighbours if they share at least one edge. If False, two geometries are considered neighbours if they share at least one vertex. By default True. - coincident : string (default: "raise") - How to deal with coincident points. Coincident points make all triangulations + coplanar : string (default: "raise") + How to deal with coplanar points. Coplanar points make all triangulations ill-posed, and thus they need to be addressed in order to create a valid graph. This parameter must be one of the following: - * "raise": raise an error if coincident points are present. This is default. + * "raise": raise an error if coplanar points are present. This is default. * "jitter": jitter the input points by a small value. This makes the resulting depend on the seed provided to the triangulation function. - * "clique": expand coincident points into a graph clique. This creates a + * "clique": expand coplanar points into a graph clique. This creates a "unique points" triangulation using all of the unique locations in the data. Then, co-located samples are connected within a site. Finally, co-located samples are connected to other sites in the "unique points" triangulation. seed : int (default: None) An integer value used to ensure that the pseudorandom number generator provides the same value over replications. By default, no seed is used, so results - will be random every time. This is only used if coincident='jitter'. + will be random every time. This is only used if coplanar='jitter'. Notes ----- @@ -428,10 +408,21 @@ def _voronoi(coordinates, clip="bounding_box", rook=True): delaunay triangulations in many applied contexts and generally will remove "long" links in the delaunay graph. """ + if coplanar == "raise": + unique = numpy.unique(coordinates, axis=0) + if unique.shape != coordinates.shape: + raise CoplanarError( + f"There are {len(unique)} unique locations in " + f"the dataset, but {len(coordinates)} observations. This means there " + "are multiple points in the same location, which is undefined " + "for this graph type. To address this issue, consider setting " + "`coplanar='clique'` or consult the documentation about " + "coplanar points." + ) cells = voronoi_frames(coordinates, clip=clip, return_input=False, as_gdf=False) heads_ix, tails_ix, weights = _vertex_set_intersection(cells, rook=rook) - return heads_ix, tails_ix + return heads_ix, tails_ix, numpy.array([]) #### utilities @@ -530,6 +521,8 @@ def _filter_relativehood(edges, coordinates, return_dkmax=False): if (i == k) or (j == k): continue pk = coordinates[k] + if (pi == pk).all() or (pj == pk).all(): # coplanar + continue dik = ((pi - pk) ** 2).sum() ** 0.5 djk = ((pj - pk) ** 2).sum() ** 0.5 dkmax = numpy.array([dik, djk]).max() @@ -543,8 +536,19 @@ def _filter_relativehood(edges, coordinates, return_dkmax=False): return out, r -def _voronoi_edges(coordinates): +def _voronoi_edges(coordinates, coplanar): dt = spatial.Delaunay(coordinates) + + if dt.coplanar.shape[0] > 0 and coplanar == "raise": + raise CoplanarError( + f"There are {len(coordinates) - len(dt.coplanar)} unique locations in " + f"the dataset, but {len(coordinates)} observations. This means there " + "are multiple points in the same location, which is undefined " + "for this graph type. To address this issue, consider setting " + "`coplanar='clique'` or consult the documentation about " + "coplanar points." + ) + edges = _edges_from_simplices(dt.simplices) edges = ( pandas.DataFrame(numpy.asarray(list(edges))) @@ -552,4 +556,4 @@ def _voronoi_edges(coordinates): .drop_duplicates() .values ) - return edges, dt + return edges, dt.points, dt.coplanar diff --git a/libpysal/graph/_utils.py b/libpysal/graph/_utils.py index 32187329f..fad7ec6c7 100644 --- a/libpysal/graph/_utils.py +++ b/libpysal/graph/_utils.py @@ -1,5 +1,4 @@ import warnings -from itertools import permutations import geopandas import numpy as np @@ -11,8 +10,18 @@ PANDAS_GE_21 = Version(pd.__version__) >= Version("2.1.0") -def _sparse_to_arrays(sparray, ids=None): - """Convert sparse array to arrays of adjacency""" +class CoplanarError(ValueError): + """Custom ValueError raised when coplanar points are detected.""" + + pass + + +def _sparse_to_arrays(sparray, ids=None, resolve_isolates=True): + """Convert sparse array to arrays of adjacency + + When we know we are dealing with cliques, we don't want to resolve + isolates here but will do that later once cliques are induced. + """ sparray = sparray.tocoo(copy=False) if ids is not None: ids = np.asarray(ids) @@ -32,10 +41,14 @@ def _sparse_to_arrays(sparray, ids=None): tail = sparray.col[sorter] data = sparray.data[sorter] ids = np.arange(sparray.shape[0], dtype=int) - return _resolve_islands(head, tail, ids, data) + if resolve_isolates: + return _resolve_islands(head, tail, ids, data) -def _jitter_geoms(coordinates, geoms, seed=None): + return head, tail, data + + +def _jitter_geoms(coordinates, geoms=None, seed=None): """ Jitter geometries based on the smallest required movements to induce uniqueness. For each point, this samples a radius and angle uniformly @@ -61,11 +74,16 @@ def _jitter_geoms(coordinates, geoms, seed=None): dy = r + np.cos(theta) # then adding the displacements coordinates = coordinates + np.column_stack((dx, dy)) - geoms = geopandas.GeoSeries(geopandas.points_from_xy(*coordinates.T, crs=geoms.crs)) - return coordinates, geoms + if geoms is not None: + geoms = geopandas.GeoSeries( + geopandas.points_from_xy(*coordinates.T, crs=geoms.crs) + ) + return coordinates, geoms + + return coordinates -def _induce_cliques(adjtable, clique_to_members, fill_value=1): +def _induce_cliques(adjtable, coplanar, nearest, fill_value=1): """ induce cliques into the input graph. This connects everything within a clique together, as well as connecting all things outside of the clique @@ -73,34 +91,25 @@ def _induce_cliques(adjtable, clique_to_members, fill_value=1): This does not guarantee/understand ordering of the *output* adjacency table. """ - adj_across_clique = ( - adjtable.merge( - clique_to_members["input_index"], left_on="focal", right_index=True - ) - .explode("input_index") - .rename(columns={"input_index": "subclique_focal"}) - .merge(clique_to_members["input_index"], left_on="neighbor", right_index=True) - .explode("input_index") - .rename(columns={"input_index": "subclique_neighbor"}) - .reset_index() - .drop(["focal", "neighbor", "index"], axis=1) - .rename(columns={"subclique_focal": "focal", "subclique_neighbor": "neighbor"}) - )[["focal", "neighbor", "weight"]] - is_multimember_clique = clique_to_members["input_index"].str.len() > 1 - adj_within_clique = ( - clique_to_members[is_multimember_clique]["input_index"] - .apply(lambda x: list(permutations(x, 2))) - .explode() - .apply(pd.Series) - .rename(columns={0: "focal", 1: "neighbor"}) - .assign(weight=fill_value) + coplanar_addition = [] + for c, n in zip(coplanar, nearest, strict=True): + neighbors = adjtable.neighbor[adjtable.focal == n] + for n_ in neighbors: + fill = adjtable.weight[ + (adjtable.focal == n) & (adjtable.neighbor == n_) + ].item() + coplanar_addition.append([c, n_, fill]) + coplanar_addition.append([n_, c, fill]) + coplanar_addition.append([c, n, fill_value]) + coplanar_addition.append([n, c, fill_value]) + adjtable_filled = pd.concat( + [ + adjtable, + pd.DataFrame(coplanar_addition, columns=["focal", "neighbor", "weight"]), + ], + ignore_index=True, ) - - new_adj = pd.concat( - (adj_across_clique, adj_within_clique), ignore_index=True, axis=0 - ).reset_index(drop=True) - - return new_adj + return adjtable_filled def _neighbor_dict_to_edges(neighbors, weights=None): @@ -136,37 +145,23 @@ def _neighbor_dict_to_edges(neighbors, weights=None): return heads, tails, data_array -def _build_coincidence_lookup(geoms): +def _build_coplanarity_lookup(geoms): """ - Identify coincident points and create a look-up table for the coincident geometries. + Identify coplanar points and create a look-up table for the coplanar geometries. """ - valid_coincident_geom_types = set(("Point",)) # noqa: C405 - if not set(geoms.geom_type) <= valid_coincident_geom_types: - raise ValueError( - "coindicence checks are only well-defined for " - f"geom_types: {valid_coincident_geom_types}" - ) - if GPD_013: - lut = ( - geoms.to_frame("geometry") - .reset_index() - .groupby("geometry")["index"] - .agg(list) - .reset_index() - ) - else: - lut = ( - geoms.to_wkb() - .to_frame("geometry") - .reset_index() - .groupby("geometry")["index"] - .agg(list) - .reset_index() - ) - lut["geometry"] = geopandas.GeoSeries.from_wkb(lut["geometry"]) - - lut = geopandas.GeoDataFrame(lut) - return lut.rename(columns={"index": "input_index"}) + geoms = geoms.reset_index(drop=True) + coplanar = [] + nearest = [] + r = geoms.groupby(geoms).groups if GPD_013 else geoms.groupby(geoms.to_wkb()).groups + for g in r.values(): + if len(g) == 2: + coplanar.append(g[0]) + nearest.append(g[1]) + elif len(g) > 2: + for n in g[1:]: + coplanar.append(n) + nearest.append(g[0]) + return np.asarray(coplanar), np.asarray(nearest) def _validate_geometry_input(geoms, ids=None, valid_geometry_types=None): diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index b3755f645..8a5b9a8ae 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -860,7 +860,7 @@ def build_kernel( bandwidth=None, metric="euclidean", p=2, - coincident="raise", + coplanar="raise", ): """Generate Graph from geometry data based on a kernel function @@ -901,11 +901,11 @@ def build_kernel( only euclidean, minkowski, and manhattan/cityblock distances are admitted. p : int (default: 2) parameter for minkowski metric, ignored if metric != "minkowski". - coincident: str, optional (default "raise") - Method for handling coincident points when ``k`` is not None. Options are - ``'raise'`` (raising an exception when coincident points are present), - ``'jitter'`` (randomly displace coincident points to produce uniqueness), & - ``'clique'`` (induce fully-connected sub cliques for coincident points). + coplanar: str, optional (default "raise") + Method for handling coplanar points when ``k`` is not None. Options are + ``'raise'`` (raising an exception when coplanar points are present), + ``'jitter'`` (randomly displace coplanar points to produce uniqueness), & + ``'clique'`` (induce fully-connected sub cliques for coplanar points). Returns ------- @@ -922,13 +922,13 @@ def build_kernel( k=k, p=p, ids=ids, - coincident=coincident, + coplanar=coplanar, ) return cls.from_arrays(head, tail, weight) @classmethod - def build_knn(cls, data, k, metric="euclidean", p=2, coincident="raise"): + def build_knn(cls, data, k, metric="euclidean", p=2, coplanar="raise"): """Generate Graph from geometry data based on k-nearest neighbors search Parameters @@ -948,11 +948,11 @@ def build_knn(cls, data, k, metric="euclidean", p=2, coincident="raise"): only euclidean, minkowski, and manhattan/cityblock distances are admitted. p : int (default: 2) parameter for minkowski metric, ignored if metric != "minkowski". - coincident: str, optional (default "raise") - Method for handling coincident points. Options include - ``'raise'`` (raising an exception when coincident points are present), - ``'jitter'`` (randomly displace coincident points to produce uniqueness), & - ``'clique'`` (induce fully-connected sub cliques for coincident points). + coplanar: str, optional (default "raise") + Method for handling coplanar points. Options include + ``'raise'`` (raising an exception when coplanar points are present), + ``'jitter'`` (randomly displace coplanar points to produce uniqueness), & + ``'clique'`` (induce fully-connected sub cliques for coplanar points). Returns @@ -1017,7 +1017,7 @@ def build_knn(cls, data, k, metric="euclidean", p=2, coincident="raise"): k=k, p=p, ids=ids, - coincident=coincident, + coplanar=coplanar, ) return cls.from_arrays(head, tail, weight) @@ -1103,7 +1103,7 @@ def build_triangulation( kernel="boxcar", clip="bounding_box", rook=True, - coincident="raise", + coplanar="raise", ): """Generate Graph from geometry based on triangulation @@ -1151,11 +1151,11 @@ def build_triangulation( If True, two geometries are considered neighbours if they share at least one edge. If False, two geometries are considered neighbours if they share at least one vertex. By default True - coincident: str, optional (default "raise") - Method for handling coincident points. Options include - ``'raise'`` (raising an exception when coincident points are present), - ``'jitter'`` (randomly displace coincident points to produce uniqueness), & - ``'clique'`` (induce fully-connected sub cliques for coincident points). + coplanar: str, optional (default "raise") + Method for handling coplanar points. Options include + ``'raise'`` (raising an exception when coplanar points are present), + ``'jitter'`` (randomly displace coplanar points to produce uniqueness), & + ``'clique'`` (induce fully-connected sub cliques for coplanar points). Returns ------- @@ -1204,19 +1204,19 @@ def build_triangulation( if method == "delaunay": head, tail, weights = _delaunay( - data, ids=ids, bandwidth=bandwidth, kernel=kernel, coincident=coincident + data, ids=ids, bandwidth=bandwidth, kernel=kernel, coplanar=coplanar ) elif method == "gabriel": head, tail, weights = _gabriel( - data, ids=ids, bandwidth=bandwidth, kernel=kernel, coincident=coincident + data, ids=ids, bandwidth=bandwidth, kernel=kernel, coplanar=coplanar ) elif method == "relative_neighborhood": head, tail, weights = _relative_neighborhood( - data, ids=ids, bandwidth=bandwidth, kernel=kernel, coincident=coincident + data, ids=ids, bandwidth=bandwidth, kernel=kernel, coplanar=coplanar ) elif method == "voronoi": head, tail, weights = _voronoi( - data, ids=ids, clip=clip, rook=rook, coincident=coincident + data, ids=ids, clip=clip, rook=rook, coplanar=coplanar ) else: raise ValueError( diff --git a/libpysal/graph/tests/test_builders.py b/libpysal/graph/tests/test_builders.py index a071fa672..b668d81d5 100644 --- a/libpysal/graph/tests/test_builders.py +++ b/libpysal/graph/tests/test_builders.py @@ -265,14 +265,14 @@ def test_kernel_strids(self): assert pd.api.types.is_numeric_dtype(g._adjacency.dtype) def test_knn_intids(self): - g = graph.Graph.build_knn(self.gdf, k=3, coincident="jitter") + g = graph.Graph.build_knn(self.gdf, k=3, coplanar="jitter") assert pd.api.types.is_numeric_dtype(g._adjacency.index.dtypes["focal"]) assert pd.api.types.is_numeric_dtype(g._adjacency.index.dtypes["neighbor"]) assert pd.api.types.is_numeric_dtype(g._adjacency.dtype) def test_knn_strids(self): - g = graph.Graph.build_kernel(self.gdf_str, k=3, coincident="jitter") + g = graph.Graph.build_kernel(self.gdf_str, k=3, coplanar="jitter") assert pd.api.types.is_string_dtype(g._adjacency.index.dtypes["focal"]) assert pd.api.types.is_string_dtype(g._adjacency.index.dtypes["neighbor"]) diff --git a/libpysal/graph/tests/test_kernel.py b/libpysal/graph/tests/test_kernel.py index 1a7760588..f79000b2c 100644 --- a/libpysal/graph/tests/test_kernel.py +++ b/libpysal/graph/tests/test_kernel.py @@ -22,6 +22,7 @@ _kernel, _kernel_functions, ) +from libpysal.graph._utils import CoplanarError grocs = geopandas.read_file(geodatasets.get_path("geoda groceries"))[ ["OBJECTID", "geometry"] @@ -299,7 +300,7 @@ def test_metric_k(metric): # raise NotImplementedError() -def test_coincident(): +def test_coplanar(): grocs_duplicated = pd.concat( [grocs, grocs.iloc[:10], grocs.iloc[:3]], ignore_index=True ) @@ -311,20 +312,18 @@ def test_coincident(): np.testing.assert_array_equal(pd.unique(head), grocs_duplicated.index) # k, raise - with pytest.raises(ValueError, match="There are"): + with pytest.raises(CoplanarError, match="There are"): _kernel(grocs_duplicated, k=2) # k, jitter - head, tail, weight = _kernel( - grocs_duplicated, taper=False, k=2, coincident="jitter" - ) + head, tail, weight = _kernel(grocs_duplicated, taper=False, k=2, coplanar="jitter") assert head.shape[0] == len(grocs_duplicated) * 2 assert tail.shape == head.shape assert weight.shape == head.shape np.testing.assert_array_equal(pd.unique(head), grocs_duplicated.index) # k, clique - head, tail, weight = _kernel(grocs_duplicated, k=2, coincident="clique") + head, tail, weight = _kernel(grocs_duplicated, k=2, coplanar="clique") assert head.shape[0] >= len(grocs_duplicated) * 2 assert tail.shape == head.shape assert weight.shape == head.shape diff --git a/libpysal/graph/tests/test_triangulation.py b/libpysal/graph/tests/test_triangulation.py index 227b195e4..2864c5c18 100644 --- a/libpysal/graph/tests/test_triangulation.py +++ b/libpysal/graph/tests/test_triangulation.py @@ -23,6 +23,7 @@ _relative_neighborhood, _voronoi, ) +from libpysal.graph._utils import CoplanarError from libpysal.graph.base import Graph stores = geopandas.read_file(geodatasets.get_path("geoda liquor_stores")).explode( @@ -98,9 +99,9 @@ def my_kernel(distances, bandwidth): def test_correctness_voronoi_clipping(): - noclip = _voronoi(lap_coords, clip=None, rook=True) - extent = _voronoi(lap_coords, clip="bounding_box", rook=True) - alpha = _voronoi(lap_coords, clip="alpha_shape", rook=True) + noclip = _voronoi(lap_coords, coplanar="raise", clip=None, rook=True) + extent = _voronoi(lap_coords, coplanar="raise", clip="bounding_box", rook=True) + alpha = _voronoi(lap_coords, coplanar="raise", clip="alpha_shape", rook=True) g_noclip = Graph.from_arrays(*noclip) g_extent = Graph.from_arrays(*extent) @@ -269,13 +270,13 @@ def test_kernel(): np.testing.assert_array_almost_equal(expected, weight) -def test_coincident_raise_voronoi(): +def test_coplanar_raise_voronoi(): with pytest.raises(ValueError, match="There are"): _voronoi(stores, clip=False) -def test_coincident_jitter_voronoi(): - cp_heads, cp_tails, cp_w = _voronoi(stores, clip=False, coincident="jitter") +def test_coplanar_jitter_voronoi(): + cp_heads, cp_tails, cp_w = _voronoi(stores, clip=False, coplanar="jitter") unique_heads, unique_tails, unique_w = _voronoi(stores_unique, clip=False) assert not np.array_equal(cp_heads, unique_heads) assert not np.array_equal(cp_tails, unique_tails) @@ -285,14 +286,14 @@ def test_coincident_jitter_voronoi(): assert unique_heads.shape[0] == 3360 -class TestCoincident: +class TestCoplanar: def setup_method(self): self.geom = [ shapely.Point(0, 0), shapely.Point(1, 1), shapely.Point(2, 0), shapely.Point(3, 1), - shapely.Point(0, 0), # coincident point + shapely.Point(0, 0), # coplanar point shapely.Point(0, 5), ] self.df_int = geopandas.GeoDataFrame( @@ -305,13 +306,13 @@ def setup_method(self): def test_delaunay_error(self): with pytest.raises( - ValueError, + CoplanarError, match="There are 5 unique locations in the dataset, but 6 observations", ): _delaunay(self.df_int) def test_delaunay_jitter(self): - heads, tails, weights = _delaunay(self.df_int, coincident="jitter", seed=0) + heads, tails, weights = _delaunay(self.df_int, coplanar="jitter", seed=0) exp_heads = np.array( [0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5] @@ -324,7 +325,7 @@ def test_delaunay_jitter(self): np.testing.assert_array_equal(heads, exp_heads) np.testing.assert_array_equal(tails, exp_tails) - heads, tails, weights = _delaunay(self.df_string, coincident="jitter", seed=0) + heads, tails, weights = _delaunay(self.df_string, coplanar="jitter", seed=0) np.testing.assert_array_equal(heads, np.vectorize(self.mapping.get)(exp_heads)) np.testing.assert_array_equal(tails, np.vectorize(self.mapping.get)(exp_tails)) @@ -332,7 +333,7 @@ def test_delaunay_jitter(self): def test_delaunay_clique(self): # TODO: fix the implemntation to make this pass - heads, tails, weights = _delaunay(self.df_int, coincident="clique", seed=0) + heads, tails, weights = _delaunay(self.df_int, coplanar="clique") exp_heads = np.array( [0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5] @@ -345,7 +346,7 @@ def test_delaunay_clique(self): np.testing.assert_array_equal(heads, exp_heads) np.testing.assert_array_equal(tails, exp_tails) - heads, tails, weights = _delaunay(self.df_string, coincident="clique", seed=0) + heads, tails, weights = _delaunay(self.df_string, coplanar="clique") np.testing.assert_array_equal(heads, np.vectorize(self.mapping.get)(exp_heads)) np.testing.assert_array_equal(tails, np.vectorize(self.mapping.get)(exp_tails)) @@ -353,13 +354,13 @@ def test_delaunay_clique(self): def test_gabriel_error(self): with pytest.raises( - ValueError, + CoplanarError, match="There are 5 unique locations in the dataset, but 6 observations", ): _gabriel(self.df_int) def test_gabriel_jitter(self): - heads, tails, weights = _gabriel(self.df_int, coincident="jitter", seed=0) + heads, tails, weights = _gabriel(self.df_int, coplanar="jitter", seed=0) exp_heads = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5]) exp_tails = np.array([2, 4, 2, 3, 4, 5, 0, 1, 3, 1, 2, 5, 0, 1, 1, 3]) @@ -368,7 +369,7 @@ def test_gabriel_jitter(self): np.testing.assert_array_equal(heads, exp_heads) np.testing.assert_array_equal(tails, exp_tails) - heads, tails, weights = _gabriel(self.df_string, coincident="jitter", seed=0) + heads, tails, weights = _gabriel(self.df_string, coplanar="jitter", seed=0) np.testing.assert_array_equal(heads, np.vectorize(self.mapping.get)(exp_heads)) np.testing.assert_array_equal(tails, np.vectorize(self.mapping.get)(exp_tails)) @@ -376,7 +377,7 @@ def test_gabriel_jitter(self): def test_gabriel_clique(self): # TODO: fix the implemntation to make this pass - heads, tails, weights = _gabriel(self.df_int, coincident="clique", seed=0) + heads, tails, weights = _gabriel(self.df_int, coplanar="clique") exp_heads = np.array([0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 5]) exp_tails = np.array([1, 2, 4, 0, 2, 3, 4, 5, 0, 1, 3, 4, 1, 2, 0, 1, 2, 1]) @@ -385,7 +386,7 @@ def test_gabriel_clique(self): np.testing.assert_array_equal(heads, exp_heads) np.testing.assert_array_equal(tails, exp_tails) - heads, tails, weights = _gabriel(self.df_string, coincident="clique", seed=0) + heads, tails, weights = _gabriel(self.df_string, coplanar="clique") np.testing.assert_array_equal(heads, np.vectorize(self.mapping.get)(exp_heads)) np.testing.assert_array_equal(tails, np.vectorize(self.mapping.get)(exp_tails)) @@ -393,14 +394,14 @@ def test_gabriel_clique(self): def test_relative_neighborhood_error(self): with pytest.raises( - ValueError, + CoplanarError, match="There are 5 unique locations in the dataset, but 6 observations", ): _relative_neighborhood(self.df_int) def test_relative_neighborhood_jitter(self): heads, tails, weights = _relative_neighborhood( - self.df_int, coincident="jitter", seed=0 + self.df_int, coplanar="jitter", seed=0 ) exp_heads = np.array([0, 0, 1, 1, 2, 3, 3, 4, 4, 5]) @@ -411,7 +412,7 @@ def test_relative_neighborhood_jitter(self): np.testing.assert_array_equal(tails, exp_tails) heads, tails, weights = _relative_neighborhood( - self.df_string, coincident="jitter", seed=0 + self.df_string, coplanar="jitter", seed=0 ) np.testing.assert_array_equal(heads, np.vectorize(self.mapping.get)(exp_heads)) @@ -420,9 +421,7 @@ def test_relative_neighborhood_jitter(self): def test_relative_neighborhood_clique(self): # TODO: fix the implemntation to make this pass - heads, tails, weights = _relative_neighborhood( - self.df_int, coincident="clique", seed=0 - ) + heads, tails, weights = _relative_neighborhood(self.df_int, coplanar="clique") exp_heads = np.array([0, 0, 1, 1, 1, 1, 2, 2, 3, 4, 4, 5]) exp_tails = np.array([1, 4, 0, 2, 4, 5, 1, 3, 2, 0, 1, 1]) @@ -432,7 +431,7 @@ def test_relative_neighborhood_clique(self): np.testing.assert_array_equal(tails, exp_tails) heads, tails, weights = _relative_neighborhood( - self.df_string, coincident="clique", seed=0 + self.df_string, coplanar="clique" ) np.testing.assert_array_equal(heads, np.vectorize(self.mapping.get)(exp_heads)) @@ -441,13 +440,13 @@ def test_relative_neighborhood_clique(self): def test_voronoi_error(self): with pytest.raises( - ValueError, + CoplanarError, match="There are 5 unique locations in the dataset, but 6 observations", ): _voronoi(self.df_int) def test_voronoi_jitter(self): - heads, tails, weights = _voronoi(self.df_int, coincident="jitter", seed=0) + heads, tails, weights = _voronoi(self.df_int, coplanar="jitter", seed=0) exp_heads = np.array([0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5]) exp_tails = np.array([1, 2, 4, 0, 2, 3, 4, 5, 0, 1, 3, 1, 2, 5, 0, 1, 1, 3]) @@ -456,7 +455,7 @@ def test_voronoi_jitter(self): np.testing.assert_array_equal(heads, exp_heads) np.testing.assert_array_equal(tails, exp_tails) - heads, tails, weights = _voronoi(self.df_string, coincident="jitter", seed=0) + heads, tails, weights = _voronoi(self.df_string, coplanar="jitter", seed=0) np.testing.assert_array_equal(heads, np.vectorize(self.mapping.get)(exp_heads)) np.testing.assert_array_equal(tails, np.vectorize(self.mapping.get)(exp_tails)) @@ -464,7 +463,7 @@ def test_voronoi_jitter(self): def test_voronoi_clique(self): # TODO: fix the implemntation to make this pass - heads, tails, weights = _voronoi(self.df_int, coincident="clique", seed=0) + heads, tails, weights = _voronoi(self.df_int, coplanar="clique", seed=0) exp_heads = np.array([0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5]) exp_tails = np.array([1, 4, 0, 2, 3, 4, 5, 1, 3, 1, 2, 5, 0, 1, 1, 3]) @@ -473,7 +472,7 @@ def test_voronoi_clique(self): np.testing.assert_array_equal(heads, exp_heads) np.testing.assert_array_equal(tails, exp_tails) - heads, tails, weights = _voronoi(self.df_string, coincident="clique", seed=0) + heads, tails, weights = _voronoi(self.df_string, coplanar="clique", seed=0) np.testing.assert_array_equal(heads, np.vectorize(self.mapping.get)(exp_heads)) np.testing.assert_array_equal(tails, np.vectorize(self.mapping.get)(exp_tails)) @@ -482,6 +481,6 @@ def test_voronoi_clique(self): def test_wrong_resolver(self): with pytest.raises( ValueError, - match="Recieved option coincident='nonsense'", + match="Recieved option coplanar='nonsense'", ): - _delaunay(self.df_int, coincident="nonsense") + _delaunay(self.df_int, coplanar="nonsense")