In [3]:
import pandas as pd
import pyodbc
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [4]:
# download the VADER Lexicon
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\hh\AppData\Roaming\nltk_data...


True

In [12]:
df = pd.read_csv('Customer_review_data.csv')
df.head()

Unnamed: 0,ReviewID,CustomerID,ProductID,ReviewDate,Rating,ReviewText
0,1,77,18,2023-12-23,3,"Average experience, nothing special."
1,2,80,19,2024-12-25,5,The quality is top-notch.
2,3,50,13,2025-01-26,4,Five stars for the quick delivery.
3,4,78,15,2025-04-21,3,"Good quality, but could be cheaper."
4,5,64,2,2023-07-16,3,"Average experience, nothing special."


In [15]:
# Initialize the VADER sentiment intensity analyzer for analyzing the sentiment of text data
sia = SentimentIntensityAnalyzer()

# define a function to calculate sentiment scores using VADER
def calculate_senti(review):
    sentiment = sia.polarity_scores(review)
    return sentiment['compound']  # return the compound score

In [16]:
# define a function to categories sentiment using both sentiment score and review rating
def categorize_sentimnet(score, rating):
    if score>0.05:
        if rating >= 4:
            return 'Positive'       # high rating and positive sentiment
        elif rating == 3:
            return 'Mixed Postive'  # Neutral rating and positive sentiment
        else:
            return 'Mixed Negative' # low rating and positive sentiment
    elif score < -0.05:
        if rating <= 2:
            return 'Negative'       # low rating and negative sentiment
        elif rating == 3:
            return 'Mixed Negative' # neutral rating and negative sentiment
        else:
            return 'Mixed Postive'  # high rating and neagtive sentiment
    else:
        if rating >= 4:
            return 'Positive'       # high rating with neutral sentiment
        else:
            return 'Negative'       # low rating with neutral sentiment

In [17]:
# define a function to bucket sentiment scores into text

def sentiment_bucket(score):
    if score >= 0.5:
        return '0.5 to 1.0' # strongly positive sentiment
    elif 0.0 <= score < 0.5:
        return '0.0 to 0.49' # Midly Postive sentiment
    elif -0.5 <= score <0.0:
        return '-0.49 to 0.0' # Midly Negative sentiment
    else:
        return '-1.0 to -0.5' # strongly neagtive sentiment

In [18]:
# Apply sentiment to calculate sentiment score for each review
df['SentimentScore'] = df['ReviewText'].apply(calculate_senti)
df

Unnamed: 0,ReviewID,CustomerID,ProductID,ReviewDate,Rating,ReviewText,SentimentScore
0,1,77,18,2023-12-23,3,"Average experience, nothing special.",-0.3089
1,2,80,19,2024-12-25,5,The quality is top-notch.,0.0000
2,3,50,13,2025-01-26,4,Five stars for the quick delivery.,0.0000
3,4,78,15,2025-04-21,3,"Good quality, but could be cheaper.",0.2382
4,5,64,2,2023-07-16,3,"Average experience, nothing special.",-0.3089
...,...,...,...,...,...,...,...
95,96,19,13,2023-09-02,3,"Good quality, but could be cheaper.",0.2382
96,97,64,6,2024-01-19,3,"The product is okay, but the instructions were...",-0.2617
97,98,96,3,2025-11-20,5,Exceeded my expectations!,0.0000
98,99,79,16,2025-01-29,2,"Average experience, nothing special.",-0.3089


In [19]:
# Apply sentiment categorization using both text and rating
df['SentimentCategory'] = df.apply(lambda row: categorize_sentimnet(row['SentimentScore'], row['Rating']), axis=1)
df

Unnamed: 0,ReviewID,CustomerID,ProductID,ReviewDate,Rating,ReviewText,SentimentScore,SentimentCategory
0,1,77,18,2023-12-23,3,"Average experience, nothing special.",-0.3089,Mixed Negative
1,2,80,19,2024-12-25,5,The quality is top-notch.,0.0000,Positive
2,3,50,13,2025-01-26,4,Five stars for the quick delivery.,0.0000,Positive
3,4,78,15,2025-04-21,3,"Good quality, but could be cheaper.",0.2382,Mixed Postive
4,5,64,2,2023-07-16,3,"Average experience, nothing special.",-0.3089,Mixed Negative
...,...,...,...,...,...,...,...,...
95,96,19,13,2023-09-02,3,"Good quality, but could be cheaper.",0.2382,Mixed Postive
96,97,64,6,2024-01-19,3,"The product is okay, but the instructions were...",-0.2617,Mixed Negative
97,98,96,3,2025-11-20,5,Exceeded my expectations!,0.0000,Positive
98,99,79,16,2025-01-29,2,"Average experience, nothing special.",-0.3089,Negative


In [21]:
# Apply sentiment bucketing to categorizes scores into defined range
df['SentimentBucket'] = df['SentimentScore'].apply(sentiment_bucket)
df

Unnamed: 0,ReviewID,CustomerID,ProductID,ReviewDate,Rating,ReviewText,SentimentScore,SentimentCategory,SentimentBucket
0,1,77,18,2023-12-23,3,"Average experience, nothing special.",-0.3089,Mixed Negative,-0.49 to 0.0
1,2,80,19,2024-12-25,5,The quality is top-notch.,0.0000,Positive,0.0 to 0.49
2,3,50,13,2025-01-26,4,Five stars for the quick delivery.,0.0000,Positive,0.0 to 0.49
3,4,78,15,2025-04-21,3,"Good quality, but could be cheaper.",0.2382,Mixed Postive,0.0 to 0.49
4,5,64,2,2023-07-16,3,"Average experience, nothing special.",-0.3089,Mixed Negative,-0.49 to 0.0
...,...,...,...,...,...,...,...,...,...
95,96,19,13,2023-09-02,3,"Good quality, but could be cheaper.",0.2382,Mixed Postive,0.0 to 0.49
96,97,64,6,2024-01-19,3,"The product is okay, but the instructions were...",-0.2617,Mixed Negative,-0.49 to 0.0
97,98,96,3,2025-11-20,5,Exceeded my expectations!,0.0000,Positive,0.0 to 0.49
98,99,79,16,2025-01-29,2,"Average experience, nothing special.",-0.3089,Negative,-0.49 to 0.0


In [22]:
# save the file
df.to_csv('fact_customer_review_with_sentiment.csv')