In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Load the CSV file into a DataFrame (replace 'your_file.csv' with your actual file path)
file_path = 'temp.csv'  # Update this with the actual path to your file
data = pd.read_csv(file_path)

# Ensure 'Review' column is free of NaN and convert to string type
data['Review'] = data['Review'].fillna('').astype(str)

# Extract the 'Review' column
reviews = data['Review'].tolist()

# Initialize the TF-IDF Vectorizer with parameters to control n-gram range and remove stop words
tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))

# Fit and transform the reviews into TF-IDF matrix
tfidf_matrix = tfidf_vectorizer.fit_transform(reviews)

# Get feature names (unique words or bigrams)
feature_names = tfidf_vectorizer.get_feature_names_out()

# Convert the TF-IDF matrix into a readable dense format
tfidf_matrix_dense = tfidf_matrix.todense()

# Print the top 3 TF-IDF scores for each review
for i, review in enumerate(tfidf_matrix_dense):
    print(f"Review {i+1} top 3 TF-IDF scores:")
    
    # Convert the row to a list of tuples (word, score)
    word_score_pairs = list(zip(feature_names, review.tolist()[0]))
    
    # Sort the words by TF-IDF score in descending order
    sorted_word_score_pairs = sorted(word_score_pairs, key=lambda x: x[1], reverse=True)
    
    # Print the top 3 words or bigrams with the highest TF-IDF scores
    for word, score in sorted_word_score_pairs[:3]:
        print(f"{word}: {score:.4f}")
    print("\n")


Review 1 top 3 TF-IDF scores:
super: 0.3883
bike: 0.3200
bike middle: 0.2383


Review 2 top 3 TF-IDF scores:
125 bike: 0.2476
bike ns: 0.2476
bike superbike: 0.2476


Review 3 top 3 TF-IDF scores:
good: 0.2662
build: 0.1998
build quality: 0.1998


Review 4 top 3 TF-IDF scores:
good: 0.3026
bike fantastic: 0.2271
fantastic riding: 0.2271


Review 5 top 3 TF-IDF scores:
used: 0.3936
smooth: 0.3510
bike smooth: 0.1968


Review 6 top 3 TF-IDF scores:
best: 0.2237
best milage: 0.1963
best tyre: 0.1963


Review 7 top 3 TF-IDF scores:
aggressive: 0.1852
aggressive design: 0.1852
albeit: 0.1852


Review 8 top 3 TF-IDF scores:
bike super: 0.4598
super: 0.3746
comfortable stylish: 0.2299


Review 9 top 3 TF-IDF scores:
abs: 0.2433
appearance: 0.2433
appearance lack: 0.2433


Review 10 top 3 TF-IDF scores:
bike style: 0.2576
comfort engine: 0.2576
engine powerful: 0.2576


Review 11 top 3 TF-IDF scores:
beautiful ok: 0.2498
colours: 0.2498
colours beautiful: 0.2498


Review 12 top 3 TF-IDF scores