In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer

def calculate_tfidf(texts):
    # Create a TF-IDF vectorizer
    tfidf_vectorizer = TfidfVectorizer()

    # Fit and transform the input texts
    tfidf_matrix = tfidf_vectorizer.fit_transform(texts)

    # Get the feature names (words)
    feature_names = tfidf_vectorizer.get_feature_names_out()

    # Initialize a list to store TF-IDF scores for each text
    tfidf_scores_list = []

    # Iterate through the TF-IDF matrices for each text
    for tfidf_vector in tfidf_matrix.toarray():
        # Create a dictionary of word to TF-IDF score for the current text
        tfidf_scores = {feature: score for feature, score in zip(feature_names, tfidf_vector)}
        tfidf_scores_list.append(tfidf_scores)

    return tfidf_scores_list

# Example usage:
text_list = [
    "This is a sample text for TF-IDF calculation.",
    "TF-IDF is important in text analysis.",
    "Another text for TF-IDF demonstration."
]

tfidf_scores_list = calculate_tfidf(text_list)

# Print TF-IDF scores for each text
for i, tfidf_scores in enumerate(tfidf_scores_list):
    print(f"TF-IDF scores for text {i + 1}:")
    print(tfidf_scores)
    print()


TF-IDF scores for text 1:
{'analysis': 0.0, 'another': 0.0, 'calculation': 0.4383907244416506, 'demonstration': 0.0, 'for': 0.333407445657484, 'idf': 0.2589206239570202, 'important': 0.0, 'in': 0.0, 'is': 0.333407445657484, 'sample': 0.4383907244416506, 'text': 0.2589206239570202, 'tf': 0.2589206239570202, 'this': 0.4383907244416506}

TF-IDF scores for text 2:
{'analysis': 0.46499650594866215, 'another': 0.0, 'calculation': 0.0, 'demonstration': 0.0, 'for': 0.0, 'idf': 0.2746344271115768, 'important': 0.46499650594866215, 'in': 0.46499650594866215, 'is': 0.3536418282696428, 'sample': 0.0, 'text': 0.2746344271115768, 'tf': 0.2746344271115768, 'this': 0.0}

TF-IDF scores for text 3:
{'analysis': 0.0, 'another': 0.5252343076585736, 'calculation': 0.0, 'demonstration': 0.5252343076585736, 'for': 0.39945422912667033, 'idf': 0.31021184318121237, 'important': 0.0, 'in': 0.0, 'is': 0.0, 'sample': 0.0, 'text': 0.31021184318121237, 'tf': 0.31021184318121237, 'this': 0.0}

