In [26]:
from sklearn.feature_extraction.text import TfidfVectorizer

def calculate_tfidf(texts):
    # Create a TF-IDF vectorizer
    tfidf_vectorizer = TfidfVectorizer()

    # Fit and transform the input texts
    tfidf_matrix = tfidf_vectorizer.fit_transform(texts)

    # Get the feature names (words)
    feature_names = tfidf_vectorizer.get_feature_names_out()

    # Initialize a list to store TF-IDF scores for each text
    tfidf_scores_list = []

    # Iterate through the TF-IDF matrices for each text
    for tfidf_vector in tfidf_matrix.toarray():
        # Create a dictionary of word to TF-IDF score for the current text
        tfidf_scores = {feature: score for feature, score in zip(feature_names, tfidf_vector)}
        tfidf_scores_list.append(tfidf_scores)

    return tfidf_scores_list

# Example usage:
text_list = [
    "This is a sample text for TF-IDF calculation.",
    "TF-IDF is important in text analysis.",
    "Another text for TF-IDF demonstration."
]

tfidf_scores_list = calculate_tfidf(text_list)


tfidf_scores_list

[{'analysis': 0.0,
  'another': 0.0,
  'calculation': 0.4383907244416506,
  'demonstration': 0.0,
  'for': 0.333407445657484,
  'idf': 0.2589206239570202,
  'important': 0.0,
  'in': 0.0,
  'is': 0.333407445657484,
  'sample': 0.4383907244416506,
  'text': 0.2589206239570202,
  'tf': 0.2589206239570202,
  'this': 0.4383907244416506},
 {'analysis': 0.46499650594866215,
  'another': 0.0,
  'calculation': 0.0,
  'demonstration': 0.0,
  'for': 0.0,
  'idf': 0.2746344271115768,
  'important': 0.46499650594866215,
  'in': 0.46499650594866215,
  'is': 0.3536418282696428,
  'sample': 0.0,
  'text': 0.2746344271115768,
  'tf': 0.2746344271115768,
  'this': 0.0},
 {'analysis': 0.0,
  'another': 0.5252343076585736,
  'calculation': 0.0,
  'demonstration': 0.5252343076585736,
  'for': 0.39945422912667033,
  'idf': 0.31021184318121237,
  'important': 0.0,
  'in': 0.0,
  'is': 0.0,
  'sample': 0.0,
  'text': 0.31021184318121237,
  'tf': 0.31021184318121237,
  'this': 0.0}]

In [71]:
from sklearn.feature_extraction.text import TfidfVectorizer

text_list = [
    "This is a sample text for TF-IDF calculation.",
    "TF-IDF is important in text analysis.",
    "Another text for TF-IDF demonstration."
]

# Initialize the TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit and transform the text data to calculate TF-IDF scores
tfidf_matrix = tfidf_vectorizer.fit_transform(text_list)

# Get the TF-IDF scores for each item in the text_list
tfidf_scores = tfidf_matrix.toarray()

# Iterate through the text_list and print TF-IDF scores for each item
for i, item in enumerate(text_list):
    print(f"TF-IDF scores for item {i + 1}:")
    words = tfidf_vectorizer.get_feature_names_out()
    scores = tfidf_scores[i]
    for word, score in zip(words, scores):
        if score > 0:
            print(f"{word}: {score}")


TF-IDF scores for item 1:
calculation: 0.4383907244416506
for: 0.333407445657484
idf: 0.2589206239570202
is: 0.333407445657484
sample: 0.4383907244416506
text: 0.2589206239570202
tf: 0.2589206239570202
this: 0.4383907244416506
TF-IDF scores for item 2:
analysis: 0.46499650594866215
idf: 0.2746344271115768
important: 0.46499650594866215
in: 0.46499650594866215
is: 0.3536418282696428
text: 0.2746344271115768
tf: 0.2746344271115768
TF-IDF scores for item 3:
another: 0.5252343076585736
demonstration: 0.5252343076585736
for: 0.39945422912667033
idf: 0.31021184318121237
text: 0.31021184318121237
tf: 0.31021184318121237


In [74]:
from sklearn.feature_extraction.text import TfidfVectorizer

text_list = [
    "This is a sample text for TF-IDF calculation.",
    "TF-IDF is important in text analysis.",
    "Another text for TF-IDF demonstration."
]

# Initialize the TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit and transform the text data to calculate TF-IDF scores
tfidf_matrix = tfidf_vectorizer.fit_transform(text_list)

# Get the TF-IDF scores for each item in the text_list
tfidf_scores = tfidf_matrix.toarray()

# Initialize a list to store the TF-IDF scores for each item
tfidf_scores_list = []

# Iterate through the text_list and build the list of TF-IDF scores
for i, item in enumerate(text_list):
    words = tfidf_vectorizer.get_feature_names_out()
    scores = tfidf_scores[i]
    item_scores = {word: score for word, score in zip(words, scores) if score > 0}
    tfidf_scores_list.append(item_scores)

tfidf_scores_list

[{'calculation': 0.4383907244416506,
  'for': 0.333407445657484,
  'idf': 0.2589206239570202,
  'is': 0.333407445657484,
  'sample': 0.4383907244416506,
  'text': 0.2589206239570202,
  'tf': 0.2589206239570202,
  'this': 0.4383907244416506},
 {'analysis': 0.46499650594866215,
  'idf': 0.2746344271115768,
  'important': 0.46499650594866215,
  'in': 0.46499650594866215,
  'is': 0.3536418282696428,
  'text': 0.2746344271115768,
  'tf': 0.2746344271115768},
 {'another': 0.5252343076585736,
  'demonstration': 0.5252343076585736,
  'for': 0.39945422912667033,
  'idf': 0.31021184318121237,
  'text': 0.31021184318121237,
  'tf': 0.31021184318121237}]

In [77]:
from sklearn.feature_extraction.text import TfidfVectorizer

text_list = [
    "This is a sample text for TF-IDF calculation.",
    "TF-IDF is important in text analysis.",
    "Another text for TF-IDF demonstration."
]

def tf_idf(text_list):
    # Initialize the TF-IDF vectorizer
    tfidf_vectorizer = TfidfVectorizer()

    # Fit and transform the text data to calculate TF-IDF scores
    tfidf_matrix = tfidf_vectorizer.fit_transform(text_list)

    # Get the TF-IDF scores for each item in the text_list
    tfidf_scores = tfidf_matrix.toarray()

    # Initialize a list to store the TF-IDF scores for each item
    tfidf_scores_list = []

    # Iterate through the text_list and build the list of TF-IDF scores
    for i, item in enumerate(text_list):
        words = tfidf_vectorizer.get_feature_names_out()
        scores = tfidf_scores[i]
        item_scores = {word: score for word, score in zip(words, scores) if score > 0}
        tfidf_scores_list.append(item_scores)

    return tfidf_scores_list

tf_idf_list = tf_idf(text_list)
tf_idf_list

[{'calculation': 0.4383907244416506,
  'for': 0.333407445657484,
  'idf': 0.2589206239570202,
  'is': 0.333407445657484,
  'sample': 0.4383907244416506,
  'text': 0.2589206239570202,
  'tf': 0.2589206239570202,
  'this': 0.4383907244416506},
 {'analysis': 0.46499650594866215,
  'idf': 0.2746344271115768,
  'important': 0.46499650594866215,
  'in': 0.46499650594866215,
  'is': 0.3536418282696428,
  'text': 0.2746344271115768,
  'tf': 0.2746344271115768},
 {'another': 0.5252343076585736,
  'demonstration': 0.5252343076585736,
  'for': 0.39945422912667033,
  'idf': 0.31021184318121237,
  'text': 0.31021184318121237,
  'tf': 0.31021184318121237}]

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

def calculate_tfidf(texts):
    # Create a TF-IDF vectorizer
    tfidf_vectorizer = TfidfVectorizer()

    # Fit and transform the input texts
    tfidf_matrix = tfidf_vectorizer.fit_transform(texts)

    # Get the feature names (words)
    feature_names = tfidf_vectorizer.get_feature_names_out()

    # Initialize a list to store TF-IDF scores for each text
    tfidf_scores_list = []

    # Iterate through the TF-IDF matrices for each text
    for tfidf_vector in tfidf_matrix.toarray():
        # Create a dictionary of word to TF-IDF score for the current text
        tfidf_scores = {feature: score for feature, score in zip(feature_names, tfidf_vector)}
        #print (f'tfidf_scores{tfidf_scores}')
        result = {key: value for key, value in tfidf_scores.items() if value > 0}
        tfidf_scores_list.append(result)
        
    

    return result

# Example usage:
text_list = [
    "This is a sample text for TF-IDF calculation.",
    "TF-IDF is important in text analysis.",
    "Another text for TF-IDF demonstration."
]

tfidf_scores_list = calculate_tfidf(text_list)


tfidf_scores_list

{'another': 0.5252343076585736,
 'demonstration': 0.5252343076585736,
 'for': 0.39945422912667033,
 'idf': 0.31021184318121237,
 'text': 0.31021184318121237,
 'tf': 0.31021184318121237}