Skip to content

Commit

Permalink
Fix issue with empty string embeddings indexing, closes #496
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmezzetti committed Jul 7, 2023
1 parent 3b554d6 commit 56f0bce
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 6 deletions.
9 changes: 3 additions & 6 deletions src/python/txtai/database/filedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,15 +377,12 @@ def insertdocument(self, uid, document, tags, entry):
if document:
self.cursor.execute(FileDB.INSERT_DOCUMENT, [uid, json.dumps(document, allow_nan=False), tags, entry])

# Get value of text field
text = document.get("text")

# If both text and object are set, insert object as it won't otherwise be used
if text and obj:
# If text and object are both available, insert object as it won't otherwise be used
if "text" in document and obj:
self.insertobject(uid, obj, tags, entry)

# Return value to use for section - use text if available otherwise use object
return text if text else obj
return document["text"] if "text" in document else obj

def insertobject(self, uid, obj, tags, entry):
"""
Expand Down
13 changes: 13 additions & 0 deletions test/python/testdatabase/testsqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,19 @@ def testEmpty(self):
embeddings.upsert([])
self.assertIsNotNone(embeddings.ann)

def testEmptyString(self):
"""
Test empty string indexing
"""

# Test empty string
self.embeddings.index([(0, "", None)])
self.assertTrue(self.embeddings.search("test"))

# Test empty string with dict
self.embeddings.index([(0, {"text": ""}, None)])
self.assertTrue(self.embeddings.search("test"))

def testExplain(self):
"""
Test query explain
Expand Down
13 changes: 13 additions & 0 deletions test/python/testembeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,19 @@ def testEmpty(self):
embeddings.upsert([])
self.assertIsNotNone(embeddings.ann)

def testEmptyString(self):
"""
Test empty string indexing
"""

# Test empty string
self.embeddings.index([(0, "", None)])
self.assertTrue(self.embeddings.search("test"))

# Test empty string with dict
self.embeddings.index([(0, {"text": ""}, None)])
self.assertTrue(self.embeddings.search("test"))

def testExternal(self):
"""
Test embeddings backed by external vectors
Expand Down

0 comments on commit 56f0bce

Please sign in to comment.