In [12]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Download VADER lexicon (only needed once)
nltk.download('vader_lexicon')

# File paths
input_file = "D:\\Market analysis project\\ExcelFiles\\fact_customer_reviews.csv"
output_file = "D:\\Market analysis project\\ExcelFiles\\fact_customer_reviews_with_sentiment.csv"

# Load data from CSV
df = pd.read_csv(input_file, encoding_errors='ignore')

# Initialize VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Function to calculate sentiment score
def get_sentiment_score(text):
    return sia.polarity_scores(str(text))['compound']

# Function to categorize sentiment
def categorize_sentiment(score, rating):
    if score > 0.05:
        return 'Positive' if rating >= 4 else 'Mixed Positive' if rating == 3 else 'Mixed Negative'
    elif score < -0.05:
        return 'Negative' if rating <= 2 else 'Mixed Negative' if rating == 3 else 'Mixed Positive'
    return 'Positive' if rating >= 4 else 'Negative' if rating <= 2 else 'Neutral'

# Function to bucket sentiment scores
def sentiment_bucket(score):
    return '0.5 to 1.0' if score >= 0.5 else '0.0 to 0.49' if score >= 0 else '-0.49 to 0.0' if score >= -0.5 else '-1.0 to -0.5'

# Apply sentiment analysis functions
df['SentimentScore'] = df['ReviewText'].astype(str).apply(get_sentiment_score)
df['SentimentCategory'] = df.apply(lambda row: categorize_sentiment(row['SentimentScore'], row['Rating']), axis=1)
df['SentimentBucket'] = df['SentimentScore'].apply(sentiment_bucket)

# Save results to the specified directory
df.to_csv(output_file, index=False)

# Display first few rows
print(f"File saved at: {output_file}")
print(df.head())



File saved at: D:\Market analysis project\ExcelFiles\fact_customer_reviews_with_sentiment.csv
   ReviewID  CustomerID  ProductID  ReviewDate  Rating  \
0         1          77         18  23.12.2023       3   
1         2          80         19  25.12.2024       5   
2         3          50         13  26.01.2025       4   
3         4          78         15  21.04.2025       3   
4         5          64          2  16.07.2023       3   

                             ReviewText  SentimentScore SentimentCategory  \
0  Average experience, nothing special.         -0.3089    Mixed Negative   
1            The quality is  top-notch.          0.0000          Positive   
2    Five stars for the quick delivery.          0.0000          Positive   
3   Good quality, but could be cheaper.          0.2382    Mixed Positive   
4  Average experience, nothing special.         -0.3089    Mixed Negative   

  SentimentBucket  
0    -0.49 to 0.0  
1     0.0 to 0.49  
2     0.0 to 0.49  
3     0.0 to 0

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Santh\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
