#Sentiment Analysis

In [10]:
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize
import nltk
nltk.download("vader_lexicon")
nltk.download("punkt")

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [4]:
#Import data
from google.colab import drive
drive.mount('/content/drive')

#Define file path and read the file
file_path = "/content/drive/My Drive/DisneylandReviews.csv"
df = pd.read_csv(file_path, encoding='ISO-8859-1')
df.head()

Mounted at /content/drive


Unnamed: 0,Review_ID,Rating,Year_Month,Reviewer_Location,Review_Text,Branch
0,670772142,4,2019-4,Australia,If you've ever been to Disneyland anywhere you...,Disneyland_HongKong
1,670682799,4,2019-5,Philippines,Its been a while since d last time we visit HK...,Disneyland_HongKong
2,670623270,4,2019-4,United Arab Emirates,Thanks God it wasn t too hot or too humid wh...,Disneyland_HongKong
3,670607911,4,2019-4,Australia,HK Disneyland is a great compact park. Unfortu...,Disneyland_HongKong
4,670607296,4,2019-4,United Kingdom,"the location is not in the city, took around 1...",Disneyland_HongKong


In [25]:
#initialise VADER
sia = SentimentIntensityAnalyzer()

#Define vaderSentiment function
def get_vader_sentiment(text):
    text = str(text)  # Ensure it's a string
    score = sia.polarity_scores(text)["compound"]  # Compound polarity score

    # Assign sentiment label
    if score >= 0.05:
        sentiment = "Positive"
    elif score <= -0.05:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"

    return pd.Series([score, sentiment])

#Apply to the first 100 reviews
df_subset = df.head(100).copy()
df_subset[["polarity_score", "sentiment"]] = df_subset["Review_Text"].apply(get_vader_sentiment)

#Show the data
df_subset[["Review_Text", "polarity_score", "sentiment"]].head()

Unnamed: 0,Review_Text,polarity_score,sentiment
0,If you've ever been to Disneyland anywhere you...,0.7069,Positive
1,Its been a while since d last time we visit HK...,0.9901,Positive
2,Thanks God it wasn t too hot or too humid wh...,0.992,Positive
3,HK Disneyland is a great compact park. Unfortu...,0.8489,Positive
4,"the location is not in the city, took around 1...",0.2846,Positive


In [32]:
#Initialise VADER
sia = SentimentIntensityAnalyzer()

# Function to extract only negative sentences from a review
def extract_negative_sentences(text):
    text = str(text)
    sentences = sent_tokenize(text) #split reviews into sentences

    if not sentences:
        return pd.Series(["", 0.0])

    #Analyse sentiment
    negative_sentences = [(sent, sia.polarity_scores(sent)["compound"]) for sent in sentences if sia.polarity_scores(sent)["compound"] <= -0.05]

    if not negative_sentences:
        return pd.Series(["", 0.0])

    #Retrieve the most negative sentence
    most_negative_sentence, most_negative_score = min(negative_sentences, key=lambda x: x[1])

    return pd.Series([most_negative_sentence, most_negative_score])

# Apply function to extract negative sentences
df_subset = df.head(100).copy()
df_subset[["most_negative_sentence", "most_negative_score"]] = df_subset["Review_Text"].apply(extract_negative_sentences)

# Show updated DataFrame with negative sentences
df_subset[["Review_Text", "most_negative_sentence", "most_negative_score"]].head(n=20)

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Unnamed: 0,Review_Text,most_negative_sentence,most_negative_score
0,If you've ever been to Disneyland anywhere you...,,0.0
1,Its been a while since d last time we visit HK...,Turns into Star Wars!!,-0.6351
2,Thanks God it wasn t too hot or too humid wh...,,0.0
3,HK Disneyland is a great compact park. Unfortu...,Unfortunately there is quite a bit of maintena...,-0.1176
4,"the location is not in the city, took around 1...",,0.0
5,"Have been to Disney World, Disneyland Anaheim ...",forget trying to see one of the shows its a fr...,-0.4019
6,Great place! Your day will go by and you won't...,Too bad the parade got canceled though.,-0.5423
7,Think of it as an intro to Disney magic for th...,,0.0
8,"Feel so let down with this place,the Disneylan...","Walt Disney would be horrified, not enough eat...",-0.5423
9,I can go on talking about Disneyland. Whatever...,,0.0


In [37]:
# Initialise VADER
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize
import pandas as pd

# Initialize VADER
sia = SentimentIntensityAnalyzer()

# Function to extract ALL negative sentences from a review
def extract_all_negative_sentences(text):
    text = str(text)  # Ensure input is a string
    sentences = sent_tokenize(text)  # Split review into sentences

    if not sentences:  # If the review is empty, return empty values
        return pd.Series(["", 0.0])

    # Analyse sentiment for each sentence and keep all negative ones
    negative_sentences = [sent for sent in sentences if sia.polarity_scores(sent)["compound"] <= -0.05]

    if not negative_sentences:  # If no negative sentences, return empty
        return pd.Series(["", 0.0])

    # Join all negative sentences into a single string
    all_negative_sentences = " | ".join(negative_sentences)  # Use " | " to separate sentences

    # Get the overall most negative sentiment score from all negative sentences
    most_negative_score = min([sia.polarity_scores(sent)["compound"] for sent in negative_sentences])

    return pd.Series([all_negative_sentences, most_negative_score])

# Apply function to extract all negative sentences
df_subset = df.head(100).copy()  # Ensure a copy to avoid SettingWithCopyWarning
df_subset[["negative_sentences", "most_negative_score"]] = df_subset["Review_Text"].apply(extract_all_negative_sentences)

# Show updated DataFrame with all negative sentences
df_subset[["Review_Text", "negative_sentences", "most_negative_score"]].head(n=25)

Unnamed: 0,Review_Text,negative_sentences,most_negative_score
0,If you've ever been to Disneyland anywhere you...,,0.0
1,Its been a while since d last time we visit HK...,Turns into Star Wars!! | Seems more local like...,-0.6351
2,Thanks God it wasn t too hot or too humid wh...,,0.0
3,HK Disneyland is a great compact park. Unfortu...,Unfortunately there is quite a bit of maintena...,-0.1176
4,"the location is not in the city, took around 1...",,0.0
5,"Have been to Disney World, Disneyland Anaheim ...",forget trying to see one of the shows its a fr...,-0.4019
6,Great place! Your day will go by and you won't...,Too bad the parade got canceled though.,-0.5423
7,Think of it as an intro to Disney magic for th...,,0.0
8,"Feel so let down with this place,the Disneylan...","Walt Disney would be horrified, not enough eat...",-0.5423
9,I can go on talking about Disneyland. Whatever...,,0.0
