Skip to content

Commit

Permalink
updates to logic and tests using knn
Browse files Browse the repository at this point in the history
  • Loading branch information
jlaura committed Jun 30, 2015
1 parent dc516fd commit a38c7d2
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 99 deletions.
86 changes: 18 additions & 68 deletions pysal/weights/Distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
__all__ = ["knnW", "Kernel", "DistanceBand"]


def knnW(data, k=2, p=2, ids=None, pct_unique=0.25):
def knnW(kdtree, k=2, p=2, ids=None):
"""
Creates nearest neighbor weights matrix based on k nearest
neighbors.
Parameters
----------
data : array
(n,k) or KDTree where KDtree.data is array (n,k)
kdtree : object
PySAL KDTree or ArcKDTree where KDtree.data is array (n,k)
n observations on k characteristics used to measure
distances between the n objects
k : int
Expand All @@ -34,11 +34,9 @@ def knnW(data, k=2, p=2, ids=None, pct_unique=0.25):
1<=p<=infinity
2: Euclidean distance
1: Manhattan distance
Ignored if the KDTree is an ArcKDTree
ids : list
identifiers to attach to each observation
pct_unique : float
threshold percentage of unique points in data. Below this
threshold tree is built on unique values only
Returns
-------
Expand All @@ -50,50 +48,27 @@ def knnW(data, k=2, p=2, ids=None, pct_unique=0.25):
Examples
--------
>>> x,y=np.indices((5,5))
>>> x.shape=(25,1)
>>> y.shape=(25,1)
>>> data=np.hstack([x,y])
>>> wnn2=knnW(data,k=2)
>>> wnn4=knnW(data,k=4)
>>> set([1,5,6,2]) == set(wnn4.neighbors[0])
True
>>> set([0,6,10,1]) == set(wnn4.neighbors[5])
True
>>> set([1,5]) == set(wnn2.neighbors[0])
True
>>> set([0,6]) == set(wnn2.neighbors[5])
True
>>> "%.2f"%wnn2.pct_nonzero
'8.00'
>>> wnn4.pct_nonzero
16.0
>>> wnn3e=knnW(data,p=2,k=3)
>>> set([1,5,6]) == set(wnn3e.neighbors[0])
True
>>> wnn3m=knnW(data,p=1,k=3)
>>> a = set([1,5,2])
>>> b = set([1,5,6])
>>> c = set([1,5,10])
>>> w0n = set(wnn3m.neighbors[0])
>>> a==w0n or b==w0n or c==w0n
>>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
>>> kd = pysal.cg.kdtree.KDTree(np.array(points))
>>> wnn2 = pysal.knnW(kd, 2)
>>> [1,3] == wnn2.neighbors[0]
True
ids
>>> wnn2 = knnW(data,2)
>>> wnn2 = knnW(kd,2)
>>> wnn2[0]
{1: 1.0, 5: 1.0}
{1: 1.0, 3: 1.0}
>>> wnn2[1]
{0: 1.0, 2: 1.0}
{0: 1.0, 3: 1.0}
now with 1 rather than 0 offset
>>> wnn2 = knnW(data,2, ids = range(1,26))
>>> wnn2 = knnW(kd, 2, ids=range(1,7))
>>> wnn2[1]
{2: 1.0, 6: 1.0}
{2: 1.0, 4: 1.0}
>>> wnn2[2]
{1: 1.0, 3: 1.0}
{1: 1.0, 4: 1.0}
>>> 0 in wnn2.neighbors
False
Expand All @@ -107,34 +82,9 @@ def knnW(data, k=2, p=2, ids=None, pct_unique=0.25):
pysal.weights.W
"""

if issubclass(type(data), scipy.spatial.KDTree):
kd = data
data = kd.data
nnq = kd.query(data, k=k+1, p=p)
info = nnq[1]
elif type(data).__name__ == 'ndarray':
# check if unique points are a small fraction of all points
ind = np.lexsort(data.T)
u = data[np.concatenate(([True],np.any(data[ind[1:]]!=data[ind[:-1]],axis=1)))]
pct_u = len(u)*1. / len(data)
if pct_u < pct_unique:
tree = KDTree(u)
nnq = tree.query(data, k=k+1, p=p)
info = nnq[1]
uid = [np.where((data == ui).all(axis=1))[0][0] for ui in u]
new_info = np.zeros((len(data), k + 1), 'int')
for i, row in enumerate(info):
new_info[i] = [uid[j] for j in row]
info = new_info
else:
kd = KDTree(data)
# calculate
nnq = kd.query(data, k=k + 1, p=p)
info = nnq[1]
else:
print 'Unsupported type'
return None
data = kdtree.data

This comment has been minimized.

Copy link
@ljwolf

ljwolf Mar 23, 2016

Member

this is a breaking api change for some of our prior notebooks using knnW and is the referent for #771.

nnq = kdtree.query(data, k=k+1, p=p)
info = nnq[1]

neighbors = {}
for i, row in enumerate(info):
Expand Down Expand Up @@ -446,7 +396,7 @@ class DistanceBand(W):
neighbors : dict
of neighbors keyed by observation id
Examples
--------
Expand Down
28 changes: 9 additions & 19 deletions pysal/weights/tests/test_Distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,15 @@ def setUp(self):
10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]

def test_knnW(self):
x = np.indices((5, 5))
x, y = np.indices((5, 5))
x.shape = (25, 1)
y.shape = (25, 1)
data = np.hstack([x, y])
wnn2 = pysal.knnW(data, k=2)
wnn4 = pysal.knnW(data, k=4)
wnn4.neighbors[0]
self.assertEqual(set(wnn4.neighbors[0]), set([1, 5, 6, 2]))
self.assertEqual(set(wnn2.neighbors[5]), set([0, 6]))
self.assertEqual(wnn2.pct_nonzero, 8.0)
wnn3e = pysal.knnW(data, p=2, k=3)
self.assertEqual(set(wnn3e.neighbors[0]), set([1, 5, 6]))
wc = pysal.knnW_from_shapefile(self.polyShp)
self.assertEqual(wc.pct_nonzero, 4.081632653061225)
self.assertEqual(set(wc.neighbors[0]), set([2, 1]))
wc3 = pysal.knnW_from_shapefile(self.polyShp, k=3)
self.assertEqual(wc3.weights[1], [1, 1, 1])
self.assertEqual(set(wc3.neighbors[1]), set([0,3,7]))
kd = pysal.cg.kdtree.KDTree(np.array(self.points), distance_metric='euclidean')
wnn2 = pysal.knnW(kd, 2)
self.assertEqual(wnn2.neighbors[0], [1,3])

pts = [i.centroid for i in pysal.open(self.polyShp)]
kd = pysal.cg.kdtree.KDTree(pts)
wnn4 = pysal.knnW(kd, 4)
self.assertEqual(wnn4.neighbors[0], [2,1,3,7])
self.assertEqual(wnn4.neighbors[7], [3,6,12,11])

def test_knnW_arc(self):
pts = [x.centroid for x in pysal.open(self.arcShp)]
Expand Down
2 changes: 2 additions & 0 deletions pysal/weights/tests/test_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ def test_knnW_from_array(self):
self.assertAlmostEquals(wnn4.pct_nonzero, 16.0)
wnn4 = pysal.knnW_from_array(data, k=4)
self.assertEquals(set(wnn4.neighbors[0]), set([1, 5, 6, 2]))
'''
wnn3e = pysal.knnW(data, p=2, k=3)
self.assertEquals(set(wnn3e.neighbors[0]),set([1, 5, 6]))
wnn3m = pysal.knnW(data, p=1, k=3)
self.assertEquals(set(wnn3m.neighbors[0]), set([1, 5, 2]))
'''

def test_knnW_from_shapefile(self):
wc = pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
Expand Down
22 changes: 10 additions & 12 deletions pysal/weights/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,6 @@ def knnW_from_array(array, k=2, p=2, ids=None, radius=None):
>>> wnn4=knnW_from_array(data,k=4)
>>> set([ 1,5,6,2]) == set(wnn4.neighbors[0])
True
>>> wnn4=knnW_from_array(data,k=4)
>>> wnn3e=knnW(data,p=2,k=3)
>>> set([1,5,6]) == set(wnn3e.neighbors[0])
True
>>> wnn3m=knnW(data,p=1,k=3)
>>> set([1,5,2]) == set(wnn3m.neighbors[0])
True
Notes
-----
Expand All @@ -236,8 +229,10 @@ def knnW_from_array(array, k=2, p=2, ids=None, radius=None):
"""
if radius is not None:
array = pysal.cg.KDTree(array, distance_metric='Arc', radius=radius)
return knnW(array, k=k, p=p, ids=ids)
kdtree = pysal.cg.KDTree(array, distance_metric='Arc', radius=radius)
else:
kdtree = pysal.cg.KDTree(array)
return knnW(kdtree, k=k, p=p, ids=ids)


def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None):
Expand Down Expand Up @@ -322,12 +317,15 @@ def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None):
"""

data = get_points_array_from_shapefile(shapefile)

if radius is not None:
data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
kdtree = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
else:
kdtree = pysal.cg.KDTree(data)
if idVariable:
ids = get_ids(shapefile, idVariable)
return knnW(data, k=k, p=p, ids=ids)
return knnW(data, k=k, p=p)
return knnW(kdtree, k=k, p=p, ids=ids)
return knnW(kdtree, k=k, p=p)


def threshold_binaryW_from_array(array, threshold, p=2, radius=None):
Expand Down

0 comments on commit a38c7d2

Please sign in to comment.