Skip to content

Commit

Permalink
Remove match from API
Browse files Browse the repository at this point in the history
  • Loading branch information
softwaredoug committed May 14, 2024
1 parent 4f6e838 commit f4d5146
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 19 deletions.
11 changes: 1 addition & 10 deletions searcharray/postings.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def memory_report(self):
"""

# ***********************************************************
# Search functionality
# Search API
# ***********************************************************
def termfreqs(self, token: Union[List[str], str],
slop: int = 0,
Expand Down Expand Up @@ -601,15 +601,6 @@ def docfreq(self, token: str) -> int:
def doclengths(self) -> np.ndarray:
return self.doc_lens

def match(self, token: Union[List[str], str], slop: int = 0) -> np.ndarray:
"""Return a boolean numpy array indicating which elements contain the given term."""
token = self._check_token_arg(token)
if isinstance(token, list):
term_freq = self._phrase_freq(token, slop=slop)
else:
term_freq = self.termfreqs(token)
return term_freq > 0

def score(self, token: Union[str, List[str]],
similarity: Similarity = default_bm25,
min_posn: Optional[int] = None,
Expand Down
6 changes: 1 addition & 5 deletions test/test_minmax_posns.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from searcharray.postings import SearchArray
from test_utils import w_scenarios
import numpy as np


scenarios = {
Expand Down Expand Up @@ -44,13 +43,10 @@


@w_scenarios(scenarios)
def test_phrase_freq(docs, phrase, min_posn, max_posn, expected):
def test_min_max(docs, phrase, min_posn, max_posn, expected):
docs = docs()
docs_before = docs.copy()
term_freqs = docs.termfreqs(phrase, min_posn=min_posn,
max_posn=max_posn)
expected_matches = np.array(expected) > 0
matches = docs.match(phrase)
assert (term_freqs == expected).all()
assert (matches == expected_matches).all()
assert (docs == docs_before).all()
2 changes: 1 addition & 1 deletion test/test_phrase_matches.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def test_phrase_api(docs, phrase, expected):
docs_before = docs.copy()
term_freqs = docs.termfreqs(phrase)
expected_matches = np.array(expected) > 0
matches = docs.match(phrase)
matches = docs.termfreqs(phrase) > 0
assert (term_freqs == expected).all()
assert (matches == expected_matches).all()
assert (docs == docs_before).all()
Expand Down
4 changes: 2 additions & 2 deletions test/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ def data():


def test_match(data):
matches = data.match("foo")
matches = data.termfreqs("foo") > 0
assert (matches == [True, False, False, False] * 25).all()


def test_match_missing_term(data):
matches = data.match("not_present")
matches = data.termfreqs("not_present") > 0
assert (matches == [False, False, False, False] * 25).all()


Expand Down
2 changes: 1 addition & 1 deletion test/test_tmdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def test_tokenize_tmdb(tmdb_raw_data):


def test_slice_then_search(tmdb_data):
star_wars_in_title = tmdb_data['title_tokens'].array.match(["Star", "Wars"])
star_wars_in_title = tmdb_data['title_tokens'].array.termfreqs(["Star", "Wars"]) > 0
star_wars_in_title = tmdb_data[star_wars_in_title]
skywalkec_docfreq = star_wars_in_title['overview_tokens'].array.docfreq("Skywalker")
assert skywalkec_docfreq <= star_wars_in_title['overview_tokens'].array.corpus_size
Expand Down

0 comments on commit f4d5146

Please sign in to comment.