Skip to content

Commit

Permalink
Merge pull request #112 from jGaboardi/cleanup_nearest_neighbor
Browse files Browse the repository at this point in the history
[WIP] cleanup_nearest_neighbor
  • Loading branch information
jGaboardi committed Sep 14, 2018
2 parents 3a9a7e1 + 41386fa commit 739a37c
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 92 deletions.
149 changes: 73 additions & 76 deletions spaghetti/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,11 +855,13 @@ def allneighbordistances(self, sourcepattern, destpattern=None,
Parameters
----------
sourcepattern : str
The key of a point pattern snapped to the network.
sourcepattern : str or spaghetti.network.PointPattern
The key of a point pattern snapped to the network OR
the full spaghetti.network.PointPattern object.
destpattern : str
(Optional) The key of a point pattern snapped to the network.
(Optional) The key of a point pattern snapped to the network OR
the full spaghetti.network.PointPattern object.
fill_diagonal : float, int
(Optional) Fill the diagonal of the cost matrix. Default in None
Expand Down Expand Up @@ -888,29 +890,32 @@ def allneighbordistances(self, sourcepattern, destpattern=None,
>>> ntw.snapobservations(examples.get_path('crimes.shp'),
... 'crimes',
... attribute=True)
>>> crimes_pp = ntw.pointpatterns['crimes']
>>> s2s_dist = ntw.allneighbordistances(crimes_pp)
>>> s2s_dist = ntw.allneighbordistances('crimes')
>>> s2s_dist[0,0], s2s_dist[1,0]
(nan, 3105.189475447081)
>>> ntw.snapobservations(examples.get_path('schools.shp'),
... 'schools',
... attribute=False)
>>> schools_pp = ntw.pointpatterns['schools']
>>> s2d_dist = ntw.allneighbordistances(crimes_pp,
... destpattern=schools_pp)
>>> s2d_dist = ntw.allneighbordistances('crimes',
... destpattern='schools')
>>> s2d_dist[0,0], s2d_dist[1,0]
(4520.72353741989, 6340.422971967316)
"""

if not hasattr(self, 'alldistances'):
self.node_distance_matrix(n_processes, gen_tree=gen_tree)


if type(sourcepattern) is str:
sourcepattern = self.pointpatterns[sourcepattern]
if destpattern:
destpattern = self.pointpatterns[destpattern]

# Source setup
src_indices = list(sourcepattern.points.keys())
nsource_pts = len(src_indices)
Expand Down Expand Up @@ -995,6 +1000,7 @@ def allneighbordistances(self, sourcepattern, destpattern=None,
# Mirror the upper and lower triangle
# when symmetric.
nearest[p2, p1] = nearest[p1, p2]

# Populate the main diagonal when symmetric.
if symmetric:
if fill_diagonal is None:
Expand All @@ -1006,7 +1012,8 @@ def allneighbordistances(self, sourcepattern, destpattern=None,


def nearestneighbordistances(self, sourcepattern, destpattern=None,
n_processes=None, gen_tree=False):
n_processes=None, gen_tree=False,
all_dists=None, keep_zero_dist=True):
"""Compute the interpattern nearest neighbor distances or the
intrapattern nearest neighbor distances between a source
pattern and a destination pattern.
Expand All @@ -1028,86 +1035,76 @@ def nearestneighbordistances(self, sourcepattern, destpattern=None,
gen_tree : bool
rebuild shortest path {True}, or skip {False}
all_dists : numpy.ndarray
An array of shape (n,n) storing distances between all points.
keep_zero_dist : bool
Include zero values in minimum distance (True) or exclude (False).
Default is True. If the source pattern is the same as the
destination pattern the diagonal is filled with nans
Returns
-------
nearest : dict
key is source point id, value is tuple of list containing
nearest destination point ids and distance.
nearest : numpy.ndarray
An (n,2) shaped array with column[:,0] containing the id of the
nearest neighbor and column [:,1] containing the distance.
Examples
--------
"""
>>> import spaghetti as spgh
>>> ntw = spgh.Network(examples.get_path('streets.shp'))
>>> ntw.snapobservations(examples.get_path('crimes.shp'), 'crimes')
>>> nn = ntw.nearestneighbordistances('crimes', keep_zero_dist=True)
>>> nn[11], nn[18]
(([18, 19], 165.33982412719126), ([19], 0.0))
###########################################
#
# MERGE WITH `allneighbordistances`
#
###########################################
>>> nn = ntw.nearestneighbordistances('crimes', keep_zero_dist=False)
>>> nn[11], nn[18]
(([18, 19], 165.33982412719126), ([11], 165.33982412719126))
"""
if sourcepattern not in self.pointpatterns.keys():
err_msg = "Available point patterns are {}"
raise KeyError(err_msg.format(self.pointpatterns.keys()))

if not hasattr(self, 'alldistances'):
self.node_distance_matrix(n_processes, gen_tree=gen_tree)

pt_indices = list(self.pointpatterns[sourcepattern].points.keys())
dist_to_node = self.pointpatterns[sourcepattern].dist_to_node
nearest = np.zeros((len(pt_indices), 2), dtype=np.float32)
nearest[:, 1] = np.inf

#if destpattern is None:
# destpattern = sourcepattern

searchpts = copy.deepcopy(pt_indices)

searchnodes = {}
for s in searchpts:
e1, e2 = dist_to_node[s].keys()
searchnodes[s] = (e1, e2)

for p1 in pt_indices:
# Get the source nodes and distance to source nodes.
# source1 and source2 nodes
s1, s2 = searchnodes[p1]
sdist1, sdist2 = dist_to_node[p1].values()
symmetric = sourcepattern != destpattern

# (for source-to-source patterns) if zero-distance neighbors are
# desired, keep the diagonal as NaN and take the minimum distance
# neighbor(s), which may include zero distance neighors.
fill_diagonal = None
if not keep_zero_dist and symmetric:
# (for source-to-source patterns) if zero-distance neighbors should
# be ignored, convert the diagonal to 0.0 and take the minimum
# distance neighbor(s) that is/are not 0.0 distance.
fill_diagonal = 0.

sourcepattern = self.pointpatterns[sourcepattern]
if destpattern:
destpattern = self.pointpatterns[destpattern]

if all_dists is None:
all_dists = self.allneighbordistances(sourcepattern,
destpattern=destpattern,
fill_diagonal=fill_diagonal,
n_processes=n_processes,
gen_tree=gen_tree)
nearest = {}

for source_index in sourcepattern.points.keys():
if keep_zero_dist and symmetric:
val = np.nanmin(all_dists[source_index,:])
else:
val = np.min(all_dists[source_index,:]\
[np.nonzero(all_dists[source_index,:])])
# nearest destination (may be more than one if equal distance)
dest_idxs = np.where(all_dists[source_index,:] == val)[0].tolist()
nearest[source_index] = (dest_idxs, val)

searchpts.remove(p1)
for p2 in searchpts:
d1, d2 = searchnodes[p2]
ddist1, ddist2 = dist_to_node[p2].values()
s1_to_d1 = sdist1 + self.alldistances[s1][0][d1] + ddist1
s1_to_d2 = sdist1 + self.alldistances[s1][0][d2] + ddist2
s2_to_d1 = sdist2 + self.alldistances[s2][0][d1] + ddist1
s2_to_d2 = sdist2 + self.alldistances[s2][0][d2] + ddist2
# source1 to dest1
if s1_to_d1 < nearest[p1, 1]:
nearest[p1, 0] = p2
nearest[p1, 1] = s1_to_d1
if s1_to_d1 < nearest[p2, 1]:
nearest[p2, 0] = p1
nearest[p2, 1] = s1_to_d1
# source1 to dest2
if s1_to_d2 < nearest[p1, 1]:
nearest[p1, 0] = p2
nearest[p1, 1] = s1_to_d2
if s1_to_d2 < nearest[p2, 1]:
nearest[p2, 0] = p1
nearest[p2, 1] = s1_to_d2
# source2 to dest1
if s2_to_d1 < nearest[p1, 1]:
nearest[p1, 0] = p2
nearest[p1, 1] = s2_to_d1
if s2_to_d1 < nearest[p2, 1]:
nearest[p2, 0] = p1
nearest[p2, 1] = s2_to_d1
# source2 to dest2
if s2_to_d2 < nearest[p1, 1]:
nearest[p1, 0] = p2
nearest[p1, 1] = s2_to_d2
if s2_to_d2 < nearest[p2, 1]:
nearest[p2, 0] = p1
nearest[p2, 1] = s2_to_d2

return nearest


Expand Down
22 changes: 18 additions & 4 deletions spaghetti/tests/test_network_api_from_gdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,27 @@ def test_all_neighbor_distances(self):
self.assertEqual(observed.all(), known.all())

def test_nearest_neighbor_distances(self):
# general test
with self.assertRaises(KeyError):
self.ntw.nearestneighbordistances('i_should_not_exist')
nnd = self.ntw.nearestneighbordistances('schools')
nnd1 = self.ntw.nearestneighbordistances('schools')
nnd2 = self.ntw.nearestneighbordistances('schools',
'schools')
np.testing.assert_array_equal(nnd, nnd2)

destpattern='schools')
nndv1 = np.array(list(nnd1.values()))[:,1].astype(float)
nndv2 = np.array(list(nnd2.values()))[:,1].astype(float)
np.testing.assert_array_almost_equal_nulp(nndv1, nndv2)

# nearest neighbor keeping zero test
known_zero = ([19], 0.0)
nn_c = self.ntw.nearestneighbordistances('crimes',
keep_zero_dist=True)
self.assertEqual(nn_c[18], known_zero)

# nearest neighbor omitting zero test
known_nonzero = ([11], 165.33982412719126)
nn_c = self.ntw.nearestneighbordistances('crimes',
keep_zero_dist=False)
self.assertEqual(nn_c[18], known_nonzero)

@unittest.skipIf(GEOPANDAS_EXTINCT, 'Missing Geopandas')
class TestNetworkAnalysis(unittest.TestCase):
Expand Down
22 changes: 18 additions & 4 deletions spaghetti/tests/test_network_api_from_shp.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,27 @@ def test_all_neighbor_distances(self):
self.assertEqual(observed.all(), known.all())

def test_nearest_neighbor_distances(self):
# general test
with self.assertRaises(KeyError):
self.ntw.nearestneighbordistances('i_should_not_exist')
nnd = self.ntw.nearestneighbordistances('schools')
nnd1 = self.ntw.nearestneighbordistances('schools')
nnd2 = self.ntw.nearestneighbordistances('schools',
'schools')
np.testing.assert_array_equal(nnd, nnd2)

destpattern='schools')
nndv1 = np.array(list(nnd1.values()))[:,1].astype(float)
nndv2 = np.array(list(nnd2.values()))[:,1].astype(float)
np.testing.assert_array_almost_equal_nulp(nndv1, nndv2)

# nearest neighbor keeping zero test
known_zero = ([19], 0.0)
nn_c = self.ntw.nearestneighbordistances('crimes',
keep_zero_dist=True)
self.assertEqual(nn_c[18], known_zero)

# nearest neighbor omitting zero test
known_nonzero = ([11], 165.33982412719126)
nn_c = self.ntw.nearestneighbordistances('crimes',
keep_zero_dist=False)
self.assertEqual(nn_c[18], known_nonzero)

class TestNetworkAnalysis(unittest.TestCase):

Expand Down
22 changes: 18 additions & 4 deletions spaghetti/tests/test_network_from_gdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,27 @@ def test_all_neighbor_distances(self):
self.assertEqual(observed.all(), known.all())

def test_nearest_neighbor_distances(self):
# general test
with self.assertRaises(KeyError):
self.ntw.nearestneighbordistances('i_should_not_exist')
nnd = self.ntw.nearestneighbordistances('schools')
nnd1 = self.ntw.nearestneighbordistances('schools')
nnd2 = self.ntw.nearestneighbordistances('schools',
'schools')
np.testing.assert_array_equal(nnd, nnd2)

destpattern='schools')
nndv1 = np.array(list(nnd1.values()))[:,1].astype(float)
nndv2 = np.array(list(nnd2.values()))[:,1].astype(float)
np.testing.assert_array_almost_equal_nulp(nndv1, nndv2)

# nearest neighbor keeping zero test
known_zero = ([19], 0.0)
nn_c = self.ntw.nearestneighbordistances('crimes',
keep_zero_dist=True)
self.assertEqual(nn_c[18], known_zero)

# nearest neighbor omitting zero test
known_nonzero = ([11], 165.33982412719126)
nn_c = self.ntw.nearestneighbordistances('crimes',
keep_zero_dist=False)
self.assertEqual(nn_c[18], known_nonzero)

@unittest.skipIf(GEOPANDAS_EXTINCT, 'Missing Geopandas')
class TestNetworkAnalysis(unittest.TestCase):
Expand Down
25 changes: 21 additions & 4 deletions spaghetti/tests/test_network_from_shp.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def setUp(self):
for obs in ['schools', 'crimes']:
in_data = examples.get_path('{}.shp'.format(obs))
self.ntw.snapobservations(in_data, obs, attribute=True)
#setattr(self, obs, obs)
setattr(self, obs, self.ntw.pointpatterns[obs])

def tearDown(self):
Expand Down Expand Up @@ -93,7 +94,7 @@ def test_simulate_poisson_observations(self):
self.assertEqual(npoints, sim.npoints)

def test_all_neighbor_distances(self):
distancematrix_1 = self.ntw.allneighbordistances(self.schools,
distancematrix_1 = self.ntw.allneighbordistances('schools',
gen_tree=True)
self.assertAlmostEqual(np.nansum(distancematrix_1[0]),
17682.436988, places=4)
Expand All @@ -102,19 +103,35 @@ def test_all_neighbor_distances(self):
for p, plists in predlist.items():
self.assertEqual(plists[-1], k)
self.assertEqual(self.ntw.node_list, list(predlist.keys()))
distancematrix_2 = self.ntw.allneighbordistances(self.schools,

distancematrix_2 = self.ntw.allneighbordistances('schools',
fill_diagonal=0.)
observed = distancematrix_2.diagonal()
known = np.zeros(distancematrix_2.shape[0])
self.assertEqual(observed.all(), known.all())

def test_nearest_neighbor_distances(self):
# general test
with self.assertRaises(KeyError):
self.ntw.nearestneighbordistances('i_should_not_exist')
nnd1 = self.ntw.nearestneighbordistances('schools')
nnd2 = self.ntw.nearestneighbordistances('schools',
'schools')
np.testing.assert_array_equal(nnd1, nnd2)
destpattern='schools')
nndv1 = np.array(list(nnd1.values()))[:,1].astype(float)
nndv2 = np.array(list(nnd2.values()))[:,1].astype(float)
np.testing.assert_array_almost_equal_nulp(nndv1, nndv2)

# nearest neighbor keeping zero test
known_zero = ([19], 0.0)
nn_c = self.ntw.nearestneighbordistances('crimes',
keep_zero_dist=True)
self.assertEqual(nn_c[18], known_zero)

# nearest neighbor omitting zero test
known_nonzero = ([11], 165.33982412719126)
nn_c = self.ntw.nearestneighbordistances('crimes',
keep_zero_dist=False)
self.assertEqual(nn_c[18], known_nonzero)


class TestNetworkAnalysis(unittest.TestCase):
Expand Down

0 comments on commit 739a37c

Please sign in to comment.