### Importing Scraped Files

In [None]:
import pandas as pd
import numpy as np
import re
envue = pd.read_csv("envue_full.csv")
hampton = pd.read_csv("HamptonInn.csv")
myriad = pd.read_csv("myriad.csv")
sheraton = pd.read_csv("sheraton.csv")
aloft_miami = pd.read_csv("Aloft_Miami.csv")

#### Functions

In [36]:
def clean(text):
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub(r'\s+', ' ', text, flags=re.I)
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    text = re.sub('<.*?>+', '', text)
    return text

### Introductions

Introduction
Sentiment analysis refers to the use of natural language processing, text analysis, computational linguistics, and many more to identify and quantify the sentiment of some kind of text or audio.

There are two major techniques for sentiment analysis :-

• Supervised machine learning

• Unsupervised lexicon-based

Often, you may not have the convenience of a well-labeled training dataset. In those situations, you need to use unsupervised techniques for predicting the sentiment by using knowledgebases, ontologies, databases, and lexicons that have detailed information specially curated and prepared just for sentiment analysis.

Here we use unsupervised lexicon based approach based on Vader lexicon for sentiment analysis.

Lets give an example

In [37]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()
def sentiment_analyzer_scores(sentence):
    score = analyser.polarity_scores(sentence)
    print("{:-<40} {}".format(sentence, str(score)))
    
sentiment_analyzer_scores("The phone is super cool!")
sentiment_analyzer_scores("The phone is super cool!!")
sentiment_analyzer_scores("The phone is super cool!!!")


The phone is super cool!---------------- {'neg': 0.0, 'neu': 0.316, 'pos': 0.684, 'compound': 0.7574}
The phone is super cool!!--------------- {'neg': 0.0, 'neu': 0.307, 'pos': 0.693, 'compound': 0.7772}
The phone is super cool!!!-------------- {'neg': 0.0, 'neu': 0.298, 'pos': 0.702, 'compound': 0.795}


to better understand the sentiment, we have to focus on "compound"

In [38]:
aloft_miami = aloft_miami.sort_values("date").reset_index(drop = True) # sorting and resetting 

aloft_miami['text'] = aloft_miami['text'].apply(lambda x:clean(x)) # applying the clean function to clear unwanted letters.

# I will use SentimentIntensityAnalyzer to do sentiment analysis.

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()
scores=[]
for i in range(len(aloft_miami['text'])):
    
    score = analyser.polarity_scores(aloft_miami['text'][i])
    score=score['compound']
    scores.append(score)
# What is polarity score and why does that important?
    """The key aspect of sentiment analysis is to analyze a body of text for understanding the opinion expressed by it. 
    Typically, we quantify this sentiment with a positive or negative value, called polarity. 
    The overall sentiment is often inferred as positive, neutral or negative from the sign of the polarity score.
    """

sentiment=[]
for i in scores:
    if i>=0.05:  # bigger than 0.05 must be called as positive, according to the researchers / makers.
        sentiment.append('Positive')
    elif i<=(-0.05): # smaller than -0.05 must be called as negative, according to the researchers / makers.
        sentiment.append('Negative')
    else:
        sentiment.append('Neutral')
aloft_miami['sentiment']=pd.Series(np.array(sentiment))

In [39]:
# choosing beforeaward and after_award dataframes
before_award = aloft_miami[:350]
after_award = aloft_miami[350:]

In [42]:
before_award.head() # for instance


Unnamed: 0,hotelID,user,rating,text,date,location,sentiment
0,Aloft_Miami,TheBear1998,4.0,I stayed here before an early morning flight r...,2017-11-01 00:00:00,"Orlando, Florida",Positive
1,Aloft_Miami,Kirsty B,1.0,Be careful when booking here! We stayed one ni...,2017-12-01 00:00:00,,Negative
2,Aloft_Miami,Kirsty B,1.0,Be careful when booking here! We stayed one ni...,2017-12-01 00:00:00,,Negative
3,Aloft_Miami,Ioan N,5.0,My first visit in Miami -two day accommodation...,2017-12-01 00:00:00,,Positive
4,Aloft_Miami,Kim H,5.0,This hotel was all that the pics on their webs...,2017-12-01 00:00:00,"Missouri City, Texas",Positive


In [43]:
print("Before award, sentiment ratio was : {} in Aloft Miami Hotel".format((before_award.loc[(before_award.sentiment == "Positive")].shape[0]) / before_award.sentiment.shape[0]))
print("After award, sentiment ratio was : {} in Aloft Miami Hotel".format((after_award.loc[(after_award.sentiment == "Positive")].shape[0]) / after_award.sentiment.shape[0]))

Before award, sentiment ratio was : 0.8714285714285714 in Aloft Miami Hotel
After award, sentiment ratio was : 0.9 in Aloft Miami Hotel


In [44]:
hampton['text'] = hampton['text'].apply(lambda x:clean(x))
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()
scores=[]
for i in range(len(hampton['text'])):
    
    score = analyser.polarity_scores(hampton['text'][i])
    score=score['compound']
    scores.append(score)
sentiment=[]
for i in scores:
    if i>=0.05:
        sentiment.append('Positive')
    elif i<=(-0.05):
        sentiment.append('Negative')
    else:
        sentiment.append('Neutral')
hampton['sentiment']=pd.Series(np.array(sentiment))

In [45]:
hampton = hampton.sort_values("date").reset_index(drop = True)

In [46]:
before_award = hampton[:62]
after_award = hampton[62:]
print("Before award, sentiment ratio was : {} in Hampton Hotel".format((before_award.loc[(before_award.sentiment == "Positive")].shape[0]) / before_award.sentiment.shape[0]))
print("After award, sentiment ratio was : {} in Hampton Hotel".format((after_award.loc[(after_award.sentiment == "Positive")].shape[0]) / after_award.sentiment.shape[0]))

Before award, sentiment ratio was : 0.9354838709677419 in Hampton Hotel
After award, sentiment ratio was : 0.9375 in Hampton Hotel


In [49]:
before_award.tail(15)

Unnamed: 0,hotelID,user,rating,text,date,location,sentiment
47,Hampton Inn,Saysha M,5.0,Was a great experience. The staff was amazing....,2020-02-01 00:00:00,,Positive
48,Hampton Inn,Ole_Roll,4.0,This is a quite large (for a Hampton Inn) prop...,2020-02-01 00:00:00,"Lakeville, Minnesota",Positive
49,Hampton Inn,Anne S,5.0,The staff was very friendly and helpful.Our ro...,2020-03-01 00:00:00,"San Francisco, California",Positive
50,Hampton Inn,Nschweigart,3.0,This is a nice property near the race track. T...,2020-03-01 00:00:00,,Positive
51,Hampton Inn,LifeIsBeachy6,5.0,This hotel is wonderful. The location is great...,2020-03-01 00:00:00,"Knoxville, Tennessee",Positive
52,Hampton Inn,Anne S,5.0,The staff was very friendly and helpful.Our ro...,2020-03-01 00:00:00,"San Francisco, California",Positive
53,Hampton Inn,Nschweigart,3.0,This is a nice property near the race track. T...,2020-03-01 00:00:00,,Positive
54,Hampton Inn,LifeIsBeachy6,5.0,This hotel is wonderful. The location is great...,2020-03-01 00:00:00,"Knoxville, Tennessee",Positive
55,Hampton Inn,Craig S,5.0,"The hotel was GREAT, and the location was incr...",2020-03-01 00:00:00,"Montgomery, Alabama",Positive
56,Hampton Inn,Craig S,5.0,"The hotel was GREAT, and the location was incr...",2020-03-01 00:00:00,"Montgomery, Alabama",Positive


In [50]:
envue['text'] = envue['text'].apply(lambda x:clean(x))
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()
scores=[]
for i in range(len(envue['text'])):
    
    score = analyser.polarity_scores(envue['text'][i])
    score=score['compound']
    scores.append(score)
sentiment=[]
for i in scores:
    if i>=0.05:
        sentiment.append('Positive')
    elif i<=(-0.05):
        sentiment.append('Negative')
    else:
        sentiment.append('Neutral')
envue['sentiment']=pd.Series(np.array(sentiment))

In [51]:
envue = envue.sort_values("date").reset_index(drop = True)

In [52]:
before_award = envue[:114]
after_award = envue[114:]
print("Before award, sentiment ratio was : {} in Envue Hotel".format((before_award.loc[(before_award.sentiment == "Positive")].shape[0]) / before_award.sentiment.shape[0]))
print("After award, sentiment ratio was : {} in Envue Hotel".format((after_award.loc[(after_award.sentiment == "Positive")].shape[0]) / after_award.sentiment.shape[0]))

Before award, sentiment ratio was : 0.9298245614035088 in Envue Hotel
After award, sentiment ratio was : 0.76 in Envue Hotel


In [54]:
envue.tail()

Unnamed: 0,hotelID,user,rating,text,date,location,sentiment
184,Envue,swtazn12,5.0,Front desk upgraded our room to floor with ro...,2022-04-01 00:00:00,Virginia,Positive
185,Envue,betsylev,1.0,This is the worst hotel Unless you like sleepi...,2022-05-01 00:00:00,"Pittsburgh, Pennsylvania",Negative
186,Envue,SouthwoodTruck,5.0,Stayed here for business. Actually the locatio...,2022-06-04 00:00:00,"Syracuse, New York",Positive
187,Envue,karpr1,2.0,My husband and I booked a luxury suite for Fri...,2022-06-04 00:00:00,"Haddonfield, New Jersey",Negative
188,Envue,Mariah T,1.0,Shame on Envue for not managing their restaura...,2022-06-11 00:00:00,,Negative


In [55]:
sheraton['text'] = sheraton['text'].apply(lambda x:clean(x))
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()
scores=[]
for i in range(len(sheraton['text'])):
    
    score = analyser.polarity_scores(sheraton['text'][i])
    score=score['compound']
    scores.append(score)
sentiment=[]
for i in scores:
    if i>=0.05:
        sentiment.append('Positive')
    elif i<=(-0.05):
        sentiment.append('Negative')
    else:
        sentiment.append('Neutral')
sheraton['sentiment']=pd.Series(np.array(sentiment))

In [56]:
sheraton = sheraton.sort_values("date").reset_index(drop = True)

In [57]:
before_award = sheraton[:30]
after_award = sheraton[30:]
print("Before award, sentiment ratio was : {} in Sheraton Hotel".format((before_award.loc[(before_award.sentiment == "Positive")].shape[0]) / before_award.sentiment.shape[0]))
print("After award, sentiment ratio was : {} in Sheraton Hotel".format((after_award.loc[(after_award.sentiment == "Positive")].shape[0]) / after_award.sentiment.shape[0]))

Before award, sentiment ratio was : 0.9 in Sheraton Hotel
After award, sentiment ratio was : 0.92 in Sheraton Hotel


In [58]:
myriad['text'] = myriad['text'].apply(lambda x:clean(x))
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()
scores=[]
for i in range(len(myriad['text'])):
    score = analyser.polarity_scores(myriad['text'][i])
    score=score['compound']
    scores.append(score)
sentiment=[]
for i in scores:
    if i>=0.05:
        sentiment.append('Positive')
    elif i<=(-0.05):
        sentiment.append('Negative')
    else:
        sentiment.append('Neutral')
myriad['sentiment']=pd.Series(np.array(sentiment))

In [59]:
myriad = myriad.sort_values("date").reset_index(drop = True)

In [60]:
before_award = myriad[:120]
after_award = myriad[120:]
print("Before award, sentiment ratio was : {} in Myriad Hotel".format((before_award.loc[(before_award.sentiment == "Positive")].shape[0]) / before_award.sentiment.shape[0]))
print("After award, sentiment ratio was : {} in Myriad Hotel".format((after_award.loc[(after_award.sentiment == "Positive")].shape[0]) / after_award.sentiment.shape[0]))

Before award, sentiment ratio was : 0.9583333333333334 in Myriad Hotel
After award, sentiment ratio was : 1.0 in Myriad Hotel


## Booking Numbers

In [61]:
aloft_miami.date = pd.to_datetime(aloft_miami.date)
myriad.date = pd.to_datetime(myriad.date)
sheraton.date = pd.to_datetime(sheraton.date)
envue.date = pd.to_datetime(envue.date)
hampton.date = pd.to_datetime(hampton.date)


In [62]:
aloft_miami["year"] = aloft_miami.date.dt.year
myriad["year"] = myriad.date.dt.year
sheraton["year"] = sheraton.date.dt.year
envue["year"] = envue.date.dt.year
hampton["year"] =hampton.date.dt.year


In [63]:
aloft_miami_2021 = aloft_miami.loc[(aloft_miami.year>= 2021) & (aloft_miami.year< 2022)].shape[0]
aloft_miami_2020 = aloft_miami.loc[(aloft_miami.year>= 2020) & (aloft_miami.year< 2021)].shape[0]

print("Booking numbers of Aloft Miami have {} increased".format(aloft_miami_2021/ aloft_miami_2020))

Booking numbers of Aloft Miami have 1.6153846153846154 increased


In [64]:
sheraton_2021 = sheraton.loc[(sheraton.year>= 2021) & (sheraton.year< 2022)].shape[0]
sheraton_2020 = sheraton.loc[(sheraton.year>= 2020) & (sheraton.year< 2021)].shape[0]

print("Booking numbers of Sheraton have {} increased".format(sheraton_2021/ sheraton_2020))

Booking numbers of Sheraton have 1.368421052631579 increased


In [65]:
hampton_2021 = hampton.loc[(hampton.year>= 2021) & (hampton.year< 2022)].shape[0]
hampton_2020 = hampton.loc[(hampton.year>= 2020) & (hampton.year< 2021)].shape[0]

print("Booking numbers of Hampton have {} increased".format(hampton_2021/ hampton_2020))

Booking numbers of Hampton have 1.1842105263157894 increased


In [66]:
envue_2021 =envue.loc[(envue.year>= 2021) & (envue.year< 2022)].shape[0]
envue_2020 =envue.loc[(envue.year>= 2020) & (envue.year< 2021)].shape[0]

print("Booking numbers of Envue have {} increased".format(envue_2021/envue_2020))

Booking numbers of Envue have 1.4444444444444444 increased


In [67]:
myriad_2021 =myriad.loc[(myriad.year>= 2021) & (myriad.year< 2022)].shape[0]
myriad_2020 =myriad.loc[(myriad.year>= 2020) & (myriad.year< 2021)].shape[0]

print("Booking numbers of myriad have {} increased".format(myriad_2021/myriad_2020))

Booking numbers of myriad have 1.6944444444444444 increased
