In [1]:
import math
from collections import Counter

def tokenize(text):
    """Simple tokenization by splitting on spaces and removing punctuation."""
    return [word.lower().strip('.,!?') for word in text.split()]

def compute_tf(document):
    """Compute term frequency (TF) for each word in the document."""
    word_count = Counter(tokenize(document))
    total_words = len(tokenize(document))
    return {word: count / total_words for word, count in word_count.items()}

def compute_idf(documents):
    """Compute inverse document frequency (IDF) for each word across documents."""
    total_docs = len(documents)
    word_in_docs = Counter()
    for doc in documents:
        unique_words = set(tokenize(doc))
        for word in unique_words:
            word_in_docs[word] += 1
    return {word: math.log(total_docs / (1 + count)) for word, count in word_in_docs.items()}

def compute_tfidf(documents):
    """Compute TF-IDF scores for all documents."""
    idf = compute_idf(documents)
    tfidf_scores = []
    for doc in documents:
        tf = compute_tf(doc)
        tfidf = {word: tf[word] * idf[word] for word in tf}
        tfidf_scores.append(tfidf)
    return tfidf_scores
documents = [
    "The player made a fantastic goal during the match.",
    "What an incredible goal that was!",
    "The match was intense, and the crowd cheered for every goal.",
    "The commentator praised the goal as one of the best this season."
]
tfidf_scores = compute_tfidf(documents)
for i, doc_tfidf in enumerate(tfidf_scores):
    print(f"Document {i+1} TF-IDF scores:")
    for word, score in doc_tfidf.items():
        print(f"  {word}: {score:.4f}")
    print()


Document 1 TF-IDF scores:
  the: 0.0000
  player: 0.0770
  made: 0.0770
  a: 0.0770
  fantastic: 0.0770
  goal: -0.0248
  during: 0.0770
  match: 0.0320

Document 2 TF-IDF scores:
  what: 0.1155
  an: 0.1155
  incredible: 0.1155
  goal: -0.0372
  that: 0.1155
  was: 0.0479

Document 3 TF-IDF scores:
  the: 0.0000
  match: 0.0262
  was: 0.0262
  intense: 0.0630
  and: 0.0630
  crowd: 0.0630
  cheered: 0.0630
  for: 0.0630
  every: 0.0630
  goal: -0.0203

Document 4 TF-IDF scores:
  the: 0.0000
  commentator: 0.0578
  praised: 0.0578
  goal: -0.0186
  as: 0.0578
  one: 0.0578
  of: 0.0578
  best: 0.0578
  this: 0.0578
  season: 0.0578

