In [3]:
corpus = [
    'the sun is a star',
    'the moon is a satellite',
    'the sun and moon are celestial bodies'
]

In [5]:
import math
from collections import Counter

tokenized_corpus = [doc.lower().split() for doc in corpus]
vocabulary = sorted(set(word for doc in tokenized_corpus for word in doc))

tf = []
for doc in tokenized_corpus:
    word_counts = Counter(doc)
    total_terms = len(doc)
    tf.append({word: word_counts[word] / total_terms for word in vocabulary})

df = {word: sum(word in doc for doc in tokenized_corpus) for word in vocabulary}

N = len(tokenized_corpus)
idf = {word: math.log(N / df[word]) for word in vocabulary}

tfidf_manual = []
for doc_tf in tf:
    tfidf_manual.append({word: doc_tf[word] * idf[word] for word in vocabulary})

tfidf_manual

[{'a': 0.08109302162163289,
  'and': 0.0,
  'are': 0.0,
  'bodies': 0.0,
  'celestial': 0.0,
  'is': 0.08109302162163289,
  'moon': 0.0,
  'satellite': 0.0,
  'star': 0.21972245773362198,
  'sun': 0.08109302162163289,
  'the': 0.0},
 {'a': 0.08109302162163289,
  'and': 0.0,
  'are': 0.0,
  'bodies': 0.0,
  'celestial': 0.0,
  'is': 0.08109302162163289,
  'moon': 0.08109302162163289,
  'satellite': 0.21972245773362198,
  'star': 0.0,
  'sun': 0.0,
  'the': 0.0},
 {'a': 0.0,
  'and': 0.15694461266687282,
  'are': 0.15694461266687282,
  'bodies': 0.15694461266687282,
  'celestial': 0.15694461266687282,
  'is': 0.0,
  'moon': 0.05792358687259491,
  'satellite': 0.0,
  'star': 0.0,
  'sun': 0.05792358687259491,
  'the': 0.0}]

In [7]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

count_vec = CountVectorizer()
count_matrix = count_vec.fit_transform(corpus)
count_matrix_array = count_matrix.toarray()
count_feature_names = count_vec.get_feature_names_out()

tfidf_vec = TfidfVectorizer()
tfidf_matrix = tfidf_vec.fit_transform(corpus)
tfidf_matrix_array = tfidf_matrix.toarray()
tfidf_feature_names = tfidf_vec.get_feature_names_out()

import pandas as pd
pd.DataFrame(tfidf_matrix_array, columns=tfidf_feature_names)

Unnamed: 0,and,are,bodies,celestial,is,moon,satellite,star,sun,the
0,0.0,0.0,0.0,0.0,0.480458,0.0,0.0,0.631745,0.480458,0.373119
1,0.0,0.0,0.0,0.0,0.480458,0.480458,0.631745,0.0,0.0,0.373119
2,0.426184,0.426184,0.426184,0.426184,0.0,0.324124,0.0,0.0,0.324124,0.251711
