In [1]:
import math
from collections import defaultdict

from fts import FullTextSearch, DOCUMENTS

In [2]:
fts = FullTextSearch()
fts._index_for_bm25(DOCUMENTS)

'Indexed 10 documents with 274 unique words'

In [3]:
query = "intelligence"

# Calculate scores for each document
doc_scores = fts._get_bm25_scores(query)

query_words = fts.tokenize(query)

for doc_id, score in doc_scores.items():
    # title boost
    title_words = fts.tokenize(fts.documents[doc_id]["title"])

    matching_words = sum(1 for qw in query_words if qw in title_words)
    # boost the score based on how many words matched in the title
    title_boost = 1 + (0.2 * matching_words)
    boosted_score = score * title_boost

    if boosted_score > score:
        print('Boosted score for doc_id {}: {} -> {}'.format(doc_id, score, boosted_score))

    doc_scores[doc_id] = boosted_score

doc_scores
# matched_words

Boosted score for doc_id 1: 0.05287983149893803 -> 0.06345579779872564
Boosted score for doc_id 4: 0.08216438974742392 -> 0.0985972676969087
Boosted score for doc_id 6: 0.08892843563097559 -> 0.10671412275717071


defaultdict(float,
            {'1': 0.06345579779872564,
             '2': 0.04517173709878027,
             '3': 0.049121763342148746,
             '4': 0.0985972676969087,
             '5': 0.08818380268430596,
             '6': 0.10671412275717071,
             '7': 0.08153230195553844,
             '8': 0.07271279162813266,
             '9': 0.05599004464533014,
             '10': 0.06775439697242819})

In [4]:
# Prepare search results
matched_words = fts._get_matched_words(query)
results = fts._prepare_search_results(doc_scores, matched_words)
results

[{'id': '6',
  'title': 'Artificial Intelligence Applications',
  'snippet': 'Artificial **intelligence** is revolutionizing multiple i ... nstitutions employ artificial **intelligence** for fraud detection. Manufact ... ring benefits from artificial **intelligence** through predictive mai...',
  'score': 0.107},
 {'id': '4',
  'title': 'Artificial Intelligence Revolution in Search',
  'snippet': ' patterns in data. Artificial **intelligence** neural networks mimic human b ... isual information. Artificial **intelligence** enhances capabilities across  ... benefit from these artificial **intelligence** adv...',
  'score': 0.099},
 {'id': '5',
  'title': 'Advanced Computing Systems',
  'snippet': 'Artificial **intelligence** is transforming how systems l ... nd make decisions. Artificial **intelligence** applications span from recomm ... icles. The rise of artificial **intelligence** has sparked debate abo...',
  'score': 0.088},
 {'id': '7',
  'title': 'Using AI for Text Search',
  'sn