In [1]:
import pandas as pd
import math
from collections import Counter

def compute_tf(document):
    word_count = Counter(document)
    tf = {word: count/len(document) for word, count in word_count.items()}
    return tf

def compute_idf(documents):
    N = len(documents)
    idf = {}
    all_words = set(word for doc in documents for word in doc)
    for word in all_words:
        count = sum(1 for doc in documents if word in doc)
        idf[word] = math.log(N/count)
    return idf

def compute_tfidf(document, idf):
    tfidf = {}
    tf = compute_tf(document)
    for word, tf_value in tf.items():
        tfidf[word] = tf_value * idf[word]
    return tfidf

# New data
data = [
    "Twinkle, twinkle little stars",
    "How I wonder what you are",
    "Up above the world so high",
    "Like a diamond in the sky"
]

# Split data into tokens
documents = [doc.split() for doc in data]

# Compute TF for each document
tf_data = [compute_tf(doc) for doc in documents]

# Create DataFrame for TF
tf_df = pd.DataFrame(tf_data).fillna(0)
print("TF Scores:")
print(tf_df)

# Compute IDF
idf = compute_idf(documents)
idf_df = pd.DataFrame([idf]).fillna(0)
print("\nIDF Scores:")
print(idf_df)

# Compute TF-IDF for each document
tfidf_data = [compute_tfidf(doc, idf) for doc in documents]

# Create DataFrame for TF-IDF
tfidf_df = pd.DataFrame(tfidf_data).fillna(0)
print("\nTF-IDF Scores:")
print(tfidf_df)

TF Scores:
   Twinkle,  twinkle  little  stars       How         I    wonder      what  \
0      0.25     0.25    0.25   0.25  0.000000  0.000000  0.000000  0.000000   
1      0.00     0.00    0.00   0.00  0.166667  0.166667  0.166667  0.166667   
2      0.00     0.00    0.00   0.00  0.000000  0.000000  0.000000  0.000000   
3      0.00     0.00    0.00   0.00  0.000000  0.000000  0.000000  0.000000   

        you       are  ...     above       the     world        so      high  \
0  0.000000  0.000000  ...  0.000000  0.000000  0.000000  0.000000  0.000000   
1  0.166667  0.166667  ...  0.000000  0.000000  0.000000  0.000000  0.000000   
2  0.000000  0.000000  ...  0.166667  0.166667  0.166667  0.166667  0.166667   
3  0.000000  0.000000  ...  0.000000  0.166667  0.000000  0.000000  0.000000   

       Like         a   diamond        in       sky  
0  0.000000  0.000000  0.000000  0.000000  0.000000  
1  0.000000  0.000000  0.000000  0.000000  0.000000  
2  0.000000  0.000000  0.00000