Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented delete vector function based on the ID #57

Merged
merged 4 commits into from
Sep 23, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions nearpy/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ def store_vector(self, v, data=None):
self.storage.store_vector(lshash.hash_name, bucket_key,
nv, data)

def delete_vector(self, data):
"""
Deletes vector v and his id (data) in all matching buckets in the storage.
The data argument must be JSON-serializable.
"""

# Delete data id in each hashes
for lshash in self.lshashes:
self.storage.delete_vector(lshash.hash_name, data)


def candidate_count(self, v):
"""
Expand Down
9 changes: 6 additions & 3 deletions nearpy/storage/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ def store_vector(self, hash_name, bucket_key, v, data):
"""
raise NotImplementedError

def delete_vector(self, hash_name, data):
"""
Deletes vector and JSON-serializable data in bucket with specified key.
"""
raise NotImplementedError

def get_bucket(self, hash_name, bucket_key):
"""
Returns bucket content as list of tuples (vector, data).
Expand Down Expand Up @@ -59,6 +65,3 @@ def load_hash_configuration(self, hash_name):
Loads and returns hash configuration
"""
raise NotImplementedError



9 changes: 8 additions & 1 deletion nearpy/storage/storage_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ def store_vector(self, hash_name, bucket_key, v, data):
self.buckets[hash_name][bucket_key] = []
self.buckets[hash_name][bucket_key].append((v, data))

def delete_vector(self, hash_name, data):
"""
Deletes vector and JSON-serializable data in bucket with specified key.
"""
for bucket in self.buckets[hash_name].values():
bucket[:] = [(v, id_data) for v, id_data in bucket if id_data != data]


def get_bucket(self, hash_name, bucket_key):
"""
Returns bucket content as list of tuples (vector, data).
Expand Down Expand Up @@ -81,4 +89,3 @@ def load_hash_configuration(self, hash_name):
Loads and returns hash configuration
"""
return self.hash_configs.get(hash_name)

51 changes: 50 additions & 1 deletion nearpy/tests/engine_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,18 @@
import numpy
import scipy
import unittest
import itertools


from nearpy import Engine
from nearpy.utils.utils import unitvec

from nearpy.hashes import UniBucket

class TestEngine(unittest.TestCase):

def setUp(self):
self.engine = Engine(1000)
numpy.random.seed(4)

def test_storage_issue(self):
engine1 = Engine(100)
Expand Down Expand Up @@ -73,5 +76,51 @@ def test_retrieval_sparse(self):
self.assertEqual(y_data, x_data)
self.assertAlmostEqual(y_distance, 0.0, delta=delta)

def get_keys(self, engine):
def get_bucket_keys(lshash):
bucket = engine.storage.buckets[lshash.hash_name][lshash.hash_name]
return (i for v, i in bucket)

return set(itertools.chain.from_iterable(
get_bucket_keys(lshash) for lshash in engine.lshashes))

def test_delete_vector_single_hash(self):
dim = 5
engine = Engine(dim, lshashes=[UniBucket('testHash')])
# Index 20 random vectors (set their data to a unique string)
for index in range(20):
v = numpy.random.randn(dim)
engine.store_vector(v, index)

keys = self.get_keys(engine)

engine.delete_vector(15) #delete the vector with key = 15

new_keys = self.get_keys(engine)

self.assertGreater(len(keys), len(new_keys)) #new keys has 19 elements instead of 20
self.assertIn(15, keys)
self.assertNotIn(15, new_keys) # the key 15 is the one missing

def test_delete_vector_multiple_hash(self):
dim = 5
hashes = [UniBucket('name_hash_%d' % k) for k in range(10)]
engine = Engine(dim, lshashes=hashes)
# Index 20 random vectors (set their data to a unique string)
for index in range(20):
v = numpy.random.randn(dim)
engine.store_vector(v, index)

keys = self.get_keys(engine)

engine.delete_vector(15) #delete the vector with key = 15

new_keys = self.get_keys(engine)

self.assertGreater(len(keys), len(new_keys)) #new keys has 19 elements instead of 20
self.assertIn(15, keys)
self.assertNotIn(15, new_keys) # the key 15 is the one missing


if __name__ == '__main__':
unittest.main()
1 change: 1 addition & 0 deletions nearpy/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,4 @@ def want_string(arg, encoding='utf-8'):
else:
rv = arg
return rv