Skip to content

Commit

Permalink
Use bulk interface for document merge
Browse files Browse the repository at this point in the history
  • Loading branch information
gergely-ujvari committed Feb 4, 2015
1 parent 8048e66 commit ec4d908
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 6 deletions.
38 changes: 33 additions & 5 deletions annotator/document.py
Expand Up @@ -114,6 +114,38 @@ def _remove_deficient_links(self):
filtered_list = [l for l in links if 'type' in l and 'href' in l]
self['link'] = filtered_list

@classmethod
def _bulk_delete_and_update(cls, to_delete, to_update):
bulk_list = []

for doc_to_delete in to_delete:
bulk_item = {
'delete': {
'_index': cls.es.index,
'_type': cls.__type__,
'_id': doc_to_delete['id']
}
}
bulk_list.append(bulk_item)

for doc_to_update in to_update:
bulk_item = {
'update': {
'_index': cls.es.index,
'_type': cls.__type__,
'_id': doc_to_update['id'],
}
}

update_item = {
'doc': doc_to_update
}

bulk_list.append(bulk_item)
bulk_list.append(update_item)

cls.es.conn.bulk(body=bulk_list, refresh=True)

def save(self):
"""Saves document metadata, looks for existing documents and
merges them to maintain equivalence classes"""
Expand Down Expand Up @@ -141,8 +173,4 @@ def save(self):
links = d.get('link', [])
super_doc.merge_links(links)

super(Document, super_doc).save()

# Remove assimilated docs
for d in existing_docs:
d.delete()
self._bulk_delete_and_update(existing_docs, [super_doc])
2 changes: 1 addition & 1 deletion tests/test_document.py
Expand Up @@ -272,7 +272,6 @@ def test_save_merge_documents():
d3 = Document.fetch(3)
assert d3 is None


d4 = Document({
"id": "4",
"title": "document4",
Expand All @@ -296,6 +295,7 @@ def test_save_merge_documents():
})

d5.save()

# The documents have been merged
d1 = Document.fetch(1)
d2 = Document.fetch(2)
Expand Down

0 comments on commit ec4d908

Please sign in to comment.