In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Load the CSV file into a DataFrame (replace 'your_file.csv' with your actual file path)
file_path = 'temp.csv'  # Update this with the actual path to your file
data = pd.read_csv(file_path)

# Ensure 'Review' column is free of NaN and convert to string type
data['Review'] = data['Review'].fillna('').astype(str)

# Extract the 'Review' column
reviews = data['Review'].tolist()

# Initialize the TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit and transform the reviews into TF-IDF matrix
tfidf_matrix = tfidf_vectorizer.fit_transform(reviews)

# Get feature names (unique words)
feature_names = tfidf_vectorizer.get_feature_names_out()

# Convert the TF-IDF matrix into a readable format
tfidf_matrix_dense = tfidf_matrix.todense()

# Print the top 3 TF-IDF scores for each review
for i, review in enumerate(tfidf_matrix_dense):
    print(f"Review {i+1} top 3 TF-IDF scores:")
    
    # Convert the row to a list of tuples (word, score)
    word_score_pairs = list(zip(feature_names, review.tolist()[0]))
    
    # Sort the words by TF-IDF score in descending order
    sorted_word_score_pairs = sorted(word_score_pairs, key=lambda x: x[1], reverse=True)
    
    # Print the top 3 words with the highest TF-IDF scores
    for word, score in sorted_word_score_pairs[:3]:
        print(f"{word}: {score:.4f}")
    print("\n")

Review 1 top 3 TF-IDF scores:
super: 0.4360
this: 0.3852
bike: 0.3593


Review 2 top 3 TF-IDF scores:
as: 0.2926
favourite: 0.2926
high: 0.2926


Review 3 top 3 TF-IDF scores:
good: 0.3813
build: 0.2862
safe: 0.2862


Review 4 top 3 TF-IDF scores:
good: 0.3810
gorgeous: 0.2859
his: 0.2859


Review 5 top 3 TF-IDF scores:
used: 0.4655
smooth: 0.4151
any: 0.2328


Review 6 top 3 TF-IDF scores:
best: 0.3257
maintenance: 0.2858
sport: 0.2858


Review 7 top 3 TF-IDF scores:
the: 0.2446
aggressive: 0.2146
albeit: 0.2146


Review 8 top 3 TF-IDF scores:
enough: 0.4384
this: 0.4207
super: 0.3572


Review 9 top 3 TF-IDF scores:
is: 0.3038
abs: 0.2634
an: 0.2634


Review 10 top 3 TF-IDF scores:
the: 0.4648
it: 0.3252
go: 0.2719


Review 11 top 3 TF-IDF scores:
very: 0.4770
it: 0.3494
colours: 0.2921


Review 12 top 3 TF-IDF scores:
very: 0.4337
this: 0.3823
bike: 0.2675


Review 13 top 3 TF-IDF scores:
good: 0.4619
it: 0.3731
you: 0.3389


Review 14 top 3 TF-IDF scores:
125cc: 0.3926
in: 0.2623
af