In [10]:
from collections import defaultdict

from fts import FullTextSearch, DOCUMENTS

In [11]:
fts = FullTextSearch()
fts._index_for_bm25(DOCUMENTS)

'Indexed 10 documents with 274 unique words'

In [18]:
query = "advanced systems"

doc_scores = fts._get_bm25_scores(query)
query_words = fts.tokenize(query)

for doc_id, score in doc_scores.items():
    # word proximity boost
    words = fts.tokenize(fts.documents[doc_id]['content'])
    positions = defaultdict(list)

    # Record positions of all query terms in the document
    for i, word in enumerate(words):
        for _word in query_words:
            if _word == word:
                positions[word].append(i)


    if any(not positions[word] for word in query_words):
        continue

    # This is a simplified approach - we're just checking the first occurrence of each term
    # A more comprehensive approach would check all possible combinations of positions
    word_positions = [positions[word][0] for word in query_words]
    word_distance = max(word_positions) - min(word_positions)

    proximity_boost = 1 + (1 / (word_distance + 1))
    boosted_score = score * proximity_boost

    if boosted_score > score:
        print('Boosted score for doc_id {}: {} -> {}'.format(doc_id, score, boosted_score))

    doc_scores[doc_id] = boosted_score

doc_scores

Boosted score for doc_id 7: 1.663445843474621 -> 1.7674112086917848
Boosted score for doc_id 8: 1.9866561609435296 -> 2.648874881258039


defaultdict(float,
            {'5': 1.7990188244353584,
             '7': 1.7674112086917848,
             '8': 2.648874881258039,
             '3': 0.6434069008592909,
             '4': 0.5650362023499206,
             '6': 0.44226588985465176,
             '9': 0.46095756869330806})

In [19]:
# Prepare search results
matched_words = fts._get_matched_words(query)
results = fts._prepare_search_results(doc_scores, matched_words)
results

[{'id': '8',
  'title': 'Intelligent Information Retrieval',
  'snippet': '**advanced** search **systems** now incorporate intelligence  ...  search can be found in these **systems**. Artificial intelligence conc',
  'score': 2.649},
 {'id': '5',
  'title': 'Advanced Computing Systems',
  'snippet': 'elligence is transforming how **systems** learn and make decisions. Art ... ting more general and capable **systems**. Artificial intelligence tech',
  'score': 1.799},
 {'id': '7',
  'title': 'Using AI for Text Search',
  'snippet': 'rtificial intelligence, helps **systems** understand search queries bet ... d by artificial intelligence. **advanced** artificial intelligence model',
  'score': 1.767},
 {'id': '3',
  'title': 'Information Retrieval Systems',
  'snippet': 'Modern information retrieval **systems** employ various algorithms for ... component of modern retrieval **systems**. Full text search enables use ... remain foundational to search **systems**.Modern information ...',
  's