FT.CREATE - support MAXTEXTFIELDS, TEMPORARY, NOHL, NOFREQS, SKIPINIT…

…IALSCAN (#1847) Co-authored-by: Chayim I. Kirshen <c@kirshen.com>
redis · Jan 10, 2022 · cb19873 · cb19873
1 parent 1fbc2d1
commit cb19873
Show file tree

Hide file tree

Showing 2 changed files with 124 additions and 20 deletions.
diff --git a/redis/commands/search/commands.py b/redis/commands/search/commands.py
@@ -44,7 +44,12 @@
 
 NOOFFSETS = "NOOFFSETS"
 NOFIELDS = "NOFIELDS"
+NOHL = "NOHL"
+NOFREQS = "NOFREQS"
+MAXTEXTFIELDS = "MAXTEXTFIELDS"
+TEMPORARY = "TEMPORARY"
 STOPWORDS = "STOPWORDS"
+SKIPINITIALSCAN = "SKIPINITIALSCAN"
 WITHSCORES = "WITHSCORES"
 FUZZY = "FUZZY"
 WITHPAYLOADS = "WITHPAYLOADS"
@@ -66,27 +71,57 @@ def create_index(
         no_field_flags=False,
         stopwords=None,
         definition=None,
+        max_text_fields=False,
+        temporary=None,
+        no_highlight=False,
+        no_term_frequencies=False,
+        skip_initial_scan=False,
     ):
         """
         Create the search index. The index must not already exist.
 
         ### Parameters:
 
         - **fields**: a list of TextField or NumericField objects
-        - **no_term_offsets**: If true, we will not save term offsets in the index
-        - **no_field_flags**: If true, we will not save field flags that allow searching in specific fields
-        - **stopwords**: If not None, we create the index with this custom stopword list. The list can be empty
+        - **no_term_offsets**: If true, we will not save term offsets in
+        the index
+        - **no_field_flags**: If true, we will not save field flags that
+        allow searching in specific fields
+        - **stopwords**: If not None, we create the index with this custom
+        stopword list. The list can be empty
+        - **max_text_fields**: If true, we will encode indexes as if there
+        were more than 32 text fields which allows you to add additional
+        fields (beyond 32).
+        - **temporary**: Create a lightweight temporary index which will
+        expire after the specified period of inactivity (in seconds). The
+        internal idle timer is reset whenever the index is searched or added to.
+        - **no_highlight**: If true, disabling highlighting support.
+        Also implied by no_term_offsets.
+        - **no_term_frequencies**: If true, we avoid saving the term frequencies
+        in the index.
+        - **skip_initial_scan**: If true, we do not scan and index.
 
         For more information: https://oss.redis.com/redisearch/Commands/#ftcreate
         """  # noqa
 
         args = [CREATE_CMD, self.index_name]
         if definition is not None:
             args += definition.args
+        if max_text_fields:
+            args.append(MAXTEXTFIELDS)
+        if temporary is not None and isinstance(temporary, int):
+            args.append(TEMPORARY)
+            args.append(temporary)
         if no_term_offsets:
             args.append(NOOFFSETS)
+        if no_highlight:
+            args.append(NOHL)
         if no_field_flags:
             args.append(NOFIELDS)
+        if no_term_frequencies:
+            args.append(NOFREQS)
+        if skip_initial_scan:
+            args.append(SKIPINITIALSCAN)
         if stopwords is not None and isinstance(stopwords, (list, tuple, set)):
             args += [STOPWORDS, len(stopwords)]
             if len(stopwords) > 0:
@@ -129,7 +164,6 @@ def dropindex(self, delete_documents=False):
         ### Parameters:
 
         - **delete_documents**: If `True`, all documents will be deleted.
-
         For more information: https://oss.redis.com/redisearch/Commands/#ftdropindex
         """  # noqa
         keep_str = "" if delete_documents else "KEEPDOCS"
@@ -217,23 +251,27 @@ def add_document(
         ### Parameters
 
         - **doc_id**: the id of the saved document.
-        - **nosave**: if set to true, we just index the document, and don't \
-        save a copy of it. This means that searches will just return ids.
-        - **score**: the document ranking, between 0.0 and 1.0.
-        - **payload**: optional inner-index payload we can save for fast access in scoring functions
-        - **replace**: if True, and the document already is in the index, \
+        - **nosave**: if set to true, we just index the document, and don't
+                      save a copy of it. This means that searches will just
+                      return ids.
+        - **score**: the document ranking, between 0.0 and 1.0
+        - **payload**: optional inner-index payload we can save for fast
+        i              access in scoring functions
+        - **replace**: if True, and the document already is in the index,
         we perform an update and reindex the document
-        - **partial**: if True, the fields specified will be added to the \
-        existing document. \
-        This has the added benefit that any fields specified \
-        with `no_index` will not be reindexed again. Implies `replace`
+        - **partial**: if True, the fields specified will be added to the
+                       existing document.
+                       This has the added benefit that any fields specified
+                       with `no_index`
+                       will not be reindexed again. Implies `replace`
         - **language**: Specify the language used for document tokenization.
-        - **no_create**: if True, the document is only updated and reindexed \
-        if it already exists.  If the document does not exist, an error will be \
-        returned. Implies `replace`
-        - **fields** kwargs dictionary of the document fields to be saved and/or indexed.
-
-        NOTE: Geo points shoule be encoded as strings of "lon,lat"
+        - **no_create**: if True, the document is only updated and reindexed
+                         if it already exists.
+                         If the document does not exist, an error will be
+                         returned. Implies `replace`
+        - **fields** kwargs dictionary of the document fields to be saved
+                         and/or indexed.
+                     NOTE: Geo points shoule be encoded as strings of "lon,lat"
 
         For more information: https://oss.redis.com/redisearch/Commands/#ftadd
         """  # noqa
@@ -481,7 +519,7 @@ def spellcheck(self, query, distance=None, include=None, exclude=None):
 
         **query**: search query.
         **distance***: the maximal Levenshtein distance for spelling
-        suggestions (default: 1, max: 4).
+                       suggestions (default: 1, max: 4).
         **include**: specifies an inclusion custom dictionary.
         **exclude**: specifies an exclusion custom dictionary.
 

diff --git a/tests/test_search.py b/tests/test_search.py
@@ -1154,6 +1154,72 @@ def test_index_definition(client):
     createIndex(client.ft(), num_docs=500, definition=definition)
 
 
+@pytest.mark.redismod
+def testExpire(client):
+    client.ft().create_index((TextField("txt", sortable=True),), temporary=4)
+    ttl = client.execute_command("ft.debug", "TTL", "idx")
+    assert ttl > 2
+
+    while ttl > 2:
+        ttl = client.execute_command("ft.debug", "TTL", "idx")
+        time.sleep(0.01)
+
+    # add document - should reset the ttl
+    client.ft().add_document("doc", txt="foo bar", text="this is a simple test")
+    ttl = client.execute_command("ft.debug", "TTL", "idx")
+    assert ttl > 2
+    try:
+        while True:
+            ttl = client.execute_command("ft.debug", "TTL", "idx")
+            time.sleep(0.5)
+    except redis.exceptions.ResponseError:
+        assert ttl == 0
+
+
+@pytest.mark.redismod
+def testSkipInitialScan(client):
+    client.hset("doc1", "foo", "bar")
+    q = Query("@foo:bar")
+
+    client.ft().create_index((TextField("foo"),), skip_initial_scan=True)
+    assert 0 == client.ft().search(q).total
+
+
+@pytest.mark.redismod
+def testSummarizeDisabled_nooffset(client):
+    client.ft().create_index((TextField("txt"),), no_term_offsets=True)
+    client.ft().add_document("doc1", txt="foo bar")
+    with pytest.raises(Exception):
+        client.ft().search(Query("foo").summarize(fields=["txt"]))
+
+
+@pytest.mark.redismod
+def testSummarizeDisabled_nohl(client):
+    client.ft().create_index((TextField("txt"),), no_highlight=True)
+    client.ft().add_document("doc1", txt="foo bar")
+    with pytest.raises(Exception):
+        client.ft().search(Query("foo").summarize(fields=["txt"]))
+
+
+@pytest.mark.redismod
+def testMaxTextFields(client):
+    # Creating the index definition
+    client.ft().create_index((TextField("f0"),))
+    for x in range(1, 32):
+        client.ft().alter_schema_add((TextField(f"f{x}"),))
+
+    # Should be too many indexes
+    with pytest.raises(redis.ResponseError):
+        client.ft().alter_schema_add((TextField(f"f{x}"),))
+
+    client.ft().dropindex("idx")
+    # Creating the index definition
+    client.ft().create_index((TextField("f0"),), max_text_fields=True)
+    # Fill the index with fields
+    for x in range(1, 50):
+        client.ft().alter_schema_add((TextField(f"f{x}"),))
+
+
 @pytest.mark.redismod
 @skip_ifmodversion_lt("2.0.0", "search")
 def test_create_client_definition(client):