Remove match from API

softwaredoug · May 14, 2024 · f4d5146 · f4d5146
1 parent 4f6e838
commit f4d5146
Show file tree

Hide file tree

Showing 5 changed files with 6 additions and 19 deletions.
diff --git a/searcharray/postings.py b/searcharray/postings.py
@@ -558,7 +558,7 @@ def memory_report(self):
         """
 
     # ***********************************************************
-    # Search functionality
+    # Search API
     # ***********************************************************
     def termfreqs(self, token: Union[List[str], str],
                   slop: int = 0,
@@ -601,15 +601,6 @@ def docfreq(self, token: str) -> int:
     def doclengths(self) -> np.ndarray:
         return self.doc_lens
 
-    def match(self, token: Union[List[str], str], slop: int = 0) -> np.ndarray:
-        """Return a boolean numpy array indicating which elements contain the given term."""
-        token = self._check_token_arg(token)
-        if isinstance(token, list):
-            term_freq = self._phrase_freq(token, slop=slop)
-        else:
-            term_freq = self.termfreqs(token)
-        return term_freq > 0
-
     def score(self, token: Union[str, List[str]],
               similarity: Similarity = default_bm25,
               min_posn: Optional[int] = None,

diff --git a/test/test_minmax_posns.py b/test/test_minmax_posns.py
@@ -1,6 +1,5 @@
 from searcharray.postings import SearchArray
 from test_utils import w_scenarios
-import numpy as np
 
 
 scenarios = {
@@ -44,13 +43,10 @@
 
 
 @w_scenarios(scenarios)
-def test_phrase_freq(docs, phrase, min_posn, max_posn, expected):
+def test_min_max(docs, phrase, min_posn, max_posn, expected):
     docs = docs()
     docs_before = docs.copy()
     term_freqs = docs.termfreqs(phrase, min_posn=min_posn,
                                 max_posn=max_posn)
-    expected_matches = np.array(expected) > 0
-    matches = docs.match(phrase)
     assert (term_freqs == expected).all()
-    assert (matches == expected_matches).all()
     assert (docs == docs_before).all()
diff --git a/test/test_phrase_matches.py b/test/test_phrase_matches.py
@@ -193,7 +193,7 @@ def test_phrase_api(docs, phrase, expected):
     docs_before = docs.copy()
     term_freqs = docs.termfreqs(phrase)
     expected_matches = np.array(expected) > 0
-    matches = docs.match(phrase)
+    matches = docs.termfreqs(phrase) > 0
     assert (term_freqs == expected).all()
     assert (matches == expected_matches).all()
     assert (docs == docs_before).all()

diff --git a/test/test_search.py b/test/test_search.py
@@ -13,12 +13,12 @@ def data():
 
 
 def test_match(data):
-    matches = data.match("foo")
+    matches = data.termfreqs("foo") > 0
     assert (matches == [True, False, False, False] * 25).all()
 
 
 def test_match_missing_term(data):
-    matches = data.match("not_present")
+    matches = data.termfreqs("not_present") > 0
     assert (matches == [False, False, False, False] * 25).all()
 
 

diff --git a/test/test_tmdb.py b/test/test_tmdb.py
@@ -90,7 +90,7 @@ def test_tokenize_tmdb(tmdb_raw_data):
 
 
 def test_slice_then_search(tmdb_data):
-    star_wars_in_title = tmdb_data['title_tokens'].array.match(["Star", "Wars"])
+    star_wars_in_title = tmdb_data['title_tokens'].array.termfreqs(["Star", "Wars"]) > 0
     star_wars_in_title = tmdb_data[star_wars_in_title]
     skywalkec_docfreq = star_wars_in_title['overview_tokens'].array.docfreq("Skywalker")
     assert skywalkec_docfreq <= star_wars_in_title['overview_tokens'].array.corpus_size