diff --git a/pysal/weights/Distance.py b/pysal/weights/Distance.py index 12f650f6e..129636598 100644 --- a/pysal/weights/Distance.py +++ b/pysal/weights/Distance.py @@ -15,7 +15,7 @@ __all__ = ["knnW", "Kernel", "DistanceBand"] -def knnW(data, k=2, p=2, ids=None, pct_unique=0.25): +def knnW(kdtree, k=2, p=2, ids=None): """ Creates nearest neighbor weights matrix based on k nearest neighbors. @@ -23,8 +23,8 @@ def knnW(data, k=2, p=2, ids=None, pct_unique=0.25): Parameters ---------- - data : array - (n,k) or KDTree where KDtree.data is array (n,k) + kdtree : object + PySAL KDTree or ArcKDTree where KDtree.data is array (n,k) n observations on k characteristics used to measure distances between the n objects k : int @@ -34,11 +34,9 @@ def knnW(data, k=2, p=2, ids=None, pct_unique=0.25): 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance + Ignored if the KDTree is an ArcKDTree ids : list identifiers to attach to each observation - pct_unique : float - threshold percentage of unique points in data. Below this - threshold tree is built on unique values only Returns ------- @@ -50,50 +48,27 @@ def knnW(data, k=2, p=2, ids=None, pct_unique=0.25): Examples -------- - >>> x,y=np.indices((5,5)) - >>> x.shape=(25,1) - >>> y.shape=(25,1) - >>> data=np.hstack([x,y]) - >>> wnn2=knnW(data,k=2) - >>> wnn4=knnW(data,k=4) - >>> set([1,5,6,2]) == set(wnn4.neighbors[0]) - True - >>> set([0,6,10,1]) == set(wnn4.neighbors[5]) - True - >>> set([1,5]) == set(wnn2.neighbors[0]) - True - >>> set([0,6]) == set(wnn2.neighbors[5]) - True - >>> "%.2f"%wnn2.pct_nonzero - '8.00' - >>> wnn4.pct_nonzero - 16.0 - >>> wnn3e=knnW(data,p=2,k=3) - >>> set([1,5,6]) == set(wnn3e.neighbors[0]) - True - >>> wnn3m=knnW(data,p=1,k=3) - >>> a = set([1,5,2]) - >>> b = set([1,5,6]) - >>> c = set([1,5,10]) - >>> w0n = set(wnn3m.neighbors[0]) - >>> a==w0n or b==w0n or c==w0n + >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] + >>> kd = pysal.cg.kdtree.KDTree(np.array(points)) + >>> wnn2 = pysal.knnW(kd, 2) + >>> [1,3] == wnn2.neighbors[0] True ids - >>> wnn2 = knnW(data,2) + >>> wnn2 = knnW(kd,2) >>> wnn2[0] - {1: 1.0, 5: 1.0} + {1: 1.0, 3: 1.0} >>> wnn2[1] - {0: 1.0, 2: 1.0} + {0: 1.0, 3: 1.0} now with 1 rather than 0 offset - >>> wnn2 = knnW(data,2, ids = range(1,26)) + >>> wnn2 = knnW(kd, 2, ids=range(1,7)) >>> wnn2[1] - {2: 1.0, 6: 1.0} + {2: 1.0, 4: 1.0} >>> wnn2[2] - {1: 1.0, 3: 1.0} + {1: 1.0, 4: 1.0} >>> 0 in wnn2.neighbors False @@ -107,34 +82,9 @@ def knnW(data, k=2, p=2, ids=None, pct_unique=0.25): pysal.weights.W """ - - if issubclass(type(data), scipy.spatial.KDTree): - kd = data - data = kd.data - nnq = kd.query(data, k=k+1, p=p) - info = nnq[1] - elif type(data).__name__ == 'ndarray': - # check if unique points are a small fraction of all points - ind = np.lexsort(data.T) - u = data[np.concatenate(([True],np.any(data[ind[1:]]!=data[ind[:-1]],axis=1)))] - pct_u = len(u)*1. / len(data) - if pct_u < pct_unique: - tree = KDTree(u) - nnq = tree.query(data, k=k+1, p=p) - info = nnq[1] - uid = [np.where((data == ui).all(axis=1))[0][0] for ui in u] - new_info = np.zeros((len(data), k + 1), 'int') - for i, row in enumerate(info): - new_info[i] = [uid[j] for j in row] - info = new_info - else: - kd = KDTree(data) - # calculate - nnq = kd.query(data, k=k + 1, p=p) - info = nnq[1] - else: - print 'Unsupported type' - return None + data = kdtree.data + nnq = kdtree.query(data, k=k+1, p=p) + info = nnq[1] neighbors = {} for i, row in enumerate(info): @@ -446,7 +396,7 @@ class DistanceBand(W): neighbors : dict of neighbors keyed by observation id - + Examples -------- diff --git a/pysal/weights/tests/test_Distance.py b/pysal/weights/tests/test_Distance.py index 0528d13b2..52940691e 100644 --- a/pysal/weights/tests/test_Distance.py +++ b/pysal/weights/tests/test_Distance.py @@ -13,25 +13,15 @@ def setUp(self): 10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] def test_knnW(self): - x = np.indices((5, 5)) - x, y = np.indices((5, 5)) - x.shape = (25, 1) - y.shape = (25, 1) - data = np.hstack([x, y]) - wnn2 = pysal.knnW(data, k=2) - wnn4 = pysal.knnW(data, k=4) - wnn4.neighbors[0] - self.assertEqual(set(wnn4.neighbors[0]), set([1, 5, 6, 2])) - self.assertEqual(set(wnn2.neighbors[5]), set([0, 6])) - self.assertEqual(wnn2.pct_nonzero, 8.0) - wnn3e = pysal.knnW(data, p=2, k=3) - self.assertEqual(set(wnn3e.neighbors[0]), set([1, 5, 6])) - wc = pysal.knnW_from_shapefile(self.polyShp) - self.assertEqual(wc.pct_nonzero, 4.081632653061225) - self.assertEqual(set(wc.neighbors[0]), set([2, 1])) - wc3 = pysal.knnW_from_shapefile(self.polyShp, k=3) - self.assertEqual(wc3.weights[1], [1, 1, 1]) - self.assertEqual(set(wc3.neighbors[1]), set([0,3,7])) + kd = pysal.cg.kdtree.KDTree(np.array(self.points), distance_metric='euclidean') + wnn2 = pysal.knnW(kd, 2) + self.assertEqual(wnn2.neighbors[0], [1,3]) + + pts = [i.centroid for i in pysal.open(self.polyShp)] + kd = pysal.cg.kdtree.KDTree(pts) + wnn4 = pysal.knnW(kd, 4) + self.assertEqual(wnn4.neighbors[0], [2,1,3,7]) + self.assertEqual(wnn4.neighbors[7], [3,6,12,11]) def test_knnW_arc(self): pts = [x.centroid for x in pysal.open(self.arcShp)] diff --git a/pysal/weights/tests/test_user.py b/pysal/weights/tests/test_user.py index 054f7dfc1..7011dad7c 100644 --- a/pysal/weights/tests/test_user.py +++ b/pysal/weights/tests/test_user.py @@ -33,10 +33,12 @@ def test_knnW_from_array(self): self.assertAlmostEquals(wnn4.pct_nonzero, 16.0) wnn4 = pysal.knnW_from_array(data, k=4) self.assertEquals(set(wnn4.neighbors[0]), set([1, 5, 6, 2])) + ''' wnn3e = pysal.knnW(data, p=2, k=3) self.assertEquals(set(wnn3e.neighbors[0]),set([1, 5, 6])) wnn3m = pysal.knnW(data, p=1, k=3) self.assertEquals(set(wnn3m.neighbors[0]), set([1, 5, 2])) + ''' def test_knnW_from_shapefile(self): wc = pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp")) diff --git a/pysal/weights/user.py b/pysal/weights/user.py index 7d5baf54f..ed212355d 100644 --- a/pysal/weights/user.py +++ b/pysal/weights/user.py @@ -217,13 +217,6 @@ def knnW_from_array(array, k=2, p=2, ids=None, radius=None): >>> wnn4=knnW_from_array(data,k=4) >>> set([ 1,5,6,2]) == set(wnn4.neighbors[0]) True - >>> wnn4=knnW_from_array(data,k=4) - >>> wnn3e=knnW(data,p=2,k=3) - >>> set([1,5,6]) == set(wnn3e.neighbors[0]) - True - >>> wnn3m=knnW(data,p=1,k=3) - >>> set([1,5,2]) == set(wnn3m.neighbors[0]) - True Notes ----- @@ -236,8 +229,10 @@ def knnW_from_array(array, k=2, p=2, ids=None, radius=None): """ if radius is not None: - array = pysal.cg.KDTree(array, distance_metric='Arc', radius=radius) - return knnW(array, k=k, p=p, ids=ids) + kdtree = pysal.cg.KDTree(array, distance_metric='Arc', radius=radius) + else: + kdtree = pysal.cg.KDTree(array) + return knnW(kdtree, k=k, p=p, ids=ids) def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None): @@ -322,12 +317,15 @@ def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None): """ data = get_points_array_from_shapefile(shapefile) + if radius is not None: - data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) + kdtree = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) + else: + kdtree = pysal.cg.KDTree(data) if idVariable: ids = get_ids(shapefile, idVariable) - return knnW(data, k=k, p=p, ids=ids) - return knnW(data, k=k, p=p) + return knnW(kdtree, k=k, p=p, ids=ids) + return knnW(kdtree, k=k, p=p) def threshold_binaryW_from_array(array, threshold, p=2, radius=None):