In [1]:
from datetime import datetime
from collections import defaultdict

from fts import FullTextSearch, DOCUMENTS

In [2]:
fts = FullTextSearch()
fts._index_for_bm25(DOCUMENTS)

'Indexed 10 documents with 274 unique words'

In [None]:
query = "intelligence"

doc_scores = fts._get_bm25_scores(query)

for doc_id, score in doc_scores.items():
    # recency boost
    timestamp = datetime.fromisoformat(fts.documents[doc_id]['modified'])
    age_in_hours = (datetime.now() - timestamp).total_seconds() / 3600
    # the more recent, the higher the boost
    denominator = 1 + age_in_hours * 0.0005
    recency_boost = 1 + (1 / denominator)
    boosted_score = score * recency_boost

    if boosted_score > score:
        print('Boosted score for doc_id {}: {} -> {}'.format(doc_id, score, boosted_score))

    doc_scores[doc_id] = boosted_score

doc_scores
# matched_words

Boosted score for doc_id 1: 0.05287983149893803 -> 0.06036527654755658
Boosted score for doc_id 2: 0.04517173709878027 -> 0.05136390391038524
Boosted score for doc_id 3: 0.049121763342148746 -> 0.055452497792849255
Boosted score for doc_id 4: 0.08216438974742392 -> 0.0920757644078484
Boosted score for doc_id 5: 0.08818380268430596 -> 0.0984626585063387
Boosted score for doc_id 6: 0.08892843563097559 -> 0.09902872522835386
Boosted score for doc_id 7: 0.08153230195553844 -> 0.09025585662376011
Boosted score for doc_id 8: 0.07271279162813266 -> 0.08028087606176137
Boosted score for doc_id 9: 0.05599004464533014 -> 0.06149924842177486
Boosted score for doc_id 10: 0.06775439697242819 -> 0.07429890342367747


defaultdict(float,
            {'1': 0.06036527654755658,
             '2': 0.05136390391038524,
             '3': 0.055452497792849255,
             '4': 0.0920757644078484,
             '5': 0.0984626585063387,
             '6': 0.09902872522835386,
             '7': 0.09025585662376011,
             '8': 0.08028087606176137,
             '9': 0.06149924842177486,
             '10': 0.07429890342367747})

In [None]:
# Prepare search results
matched_words = fts._get_matched_words(query)
results = fts._prepare_search_results(doc_scores, matched_words)
results

[{'id': '6',
  'title': 'Artificial Intelligence Applications',
  'snippet': 'Artificial intelligence is revolutionizing multiple industries. Healthcare uses AI for diagnosis and...',
  'score': 0.099},
 {'id': '5',
  'title': 'Advanced Computing Systems',
  'snippet': 'Artificial intelligence is transforming how systems learn and make decisions. Artificial intelligenc...',
  'score': 0.098},
 {'id': '4',
  'title': 'Artificial Intelligence Revolution in Search',
  'snippet': 'Modern computing systems are becoming increasingly sophisticated. Machine learning models can recogn...',
  'score': 0.092},
 {'id': '7',
  'title': 'Using AI for Text Search',
  'snippet': 'Artificial intelligence techniques significantly improve full text search capabilities. Modern searc...',
  'score': 0.09},
 {'id': '8',
  'title': 'Intelligent Information Retrieval',
  'snippet': 'Advanced search systems now incorporate intelligence for better results. Some elements of full text ...',
  'score': 0.08},
 {'i