In [7]:
import pandas as pd
import pickle
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.sentiment.util import *

import nltk
from nltk.probability import FreqDist
from nltk.tokenize import RegexpTokenizer
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import stopwords

In [8]:
def apply_stem(doc):
    
    doc_split = doc.split(' ')
    stem_doc = ''
    for word in doc_split:
        stem_doc += stemmer.stem(word) + ' '
        
    return stem_doc



def apply_lem(doc):
    
    doc_split = doc.split(' ')
    lem_v_doc = ''
    for word in doc_split:
        lem_v_doc += lemmy.lemmatize(word, pos='v') + ' '
        
    doc_split = lem_v_doc.split(' ')
    lem_doc = ''
    for word in doc_split:
        lem_doc += lemmy.lemmatize(word, pos='a') + ' '
        
    return lem_doc

In [5]:
data = {
    "positive": [
        "Staff and waiter & waitress are hospitable, big check √.",
        "Longanisa is excellent.",
        "Great variety! All are delicious!",
        "Keep up the quality of food and service.",
        "Excellent service! All of them.",
        "A perfect place to have breakfast! Great view and fantastic food! Jerby is very attentive & courteous, a true example of a good attendant! Keep up the good work!",
        "Great food & service.",
        "Everyone is courteous and attentive. Excellent customer service.",
        "Excellent. Grateful for the birthday greeting on the plate, Thanks Ma'am Norlyn.      Thank you.",
        "Staff commendation: Alexe"
    ],
    "negative": [
        "Seasoning is a little bland, miso soup and noodles. Dry cupcakes. Last weeks breakfast was better.",
        "Ham is too bland, pandesal should be kept warm, egg should be available everyday not on request.",
        "May I suggest to add Alaska Evap (not the creamer one) along with other milk choices for the Champorado.",
        "More choices, please.",
        "More food choices (Fil, American, Italian, etc.) Staff commendation: Deck",
        "More variety for drinks would be great.",
        "More Fruits.",
        "I want macaroons.",
        "Fried rice is not warm."
    ]
}

df = []
for k in data.keys():
    df.extend([(j,k) for j in data[k]])
    
df = pd.DataFrame(df, columns=['review', 'sentiment'])
df['review_summary'] = ''
df.head()

Unnamed: 0,review,sentiment,review_summary
0,"Staff and waiter & waitress are hospitable, bi...",positive,
1,Longanisa is excellent.,positive,
2,Great variety! All are delicious!,positive,
3,Keep up the quality of food and service.,positive,
4,Excellent service! All of them.,positive,


In [10]:
stemmer = PorterStemmer()
lemmy = WordNetLemmatizer()

In [11]:
df['review_stemmed'] = df['review'].apply(apply_stem)
df['review_lemmed'] = df['review'].apply(apply_lem)

df['review_summary_stemmed'] = df['review_summary'].apply(apply_stem)
df['review_summary_lemmed'] = df['review_summary'].apply(apply_lem)

In [14]:
with open('Vectorizer/review_tfidf_vectorizer.pkl', 'rb') as f:
    tf_idf_vect = pickle.load(f)

with open('Vectorizer/review_tfidf_summary_vectorizer.pkl', 'rb') as f:
    tf_idf_vect_summary = pickle.load(f)

In [15]:
df_tfidf_sparse = tf_idf_vect.transform(df['review_lemmed'])
df_tfidf = pd.DataFrame(
    df_tfidf_sparse.toarray(), 
    columns=tf_idf_vect.get_feature_names()
)

df_summary_tfidf_sparse = tf_idf_vect_summary.transform(df['review_summary_lemmed'])
df_summary_tfidf = pd.DataFrame(
    df_summary_tfidf_sparse.toarray(), 
    columns=tf_idf_vect_summary.get_feature_names()
)

df_final = df_summary_tfidf.join(df_tfidf, lsuffix='_sum')
df_final.head()

Unnamed: 0,average,awful,bad_sum,best_sum,comfortable_sum,convenient_sum,customer,disappoint,disappointing,excellent_sum,...,travel,try,upgrade,use,visit,wait,want,water,way,well
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
with open('Models/Random Forest.pkl', 'rb') as f:
    model = pickle.load(f)



In [25]:
df2 = df.copy()
df2['predictions'] = model.best_model.predict(df_final)
df2.head(100)

Unnamed: 0,review,sentiment,review_summary,review_stemmed,review_lemmed,review_summary_stemmed,review_summary_lemmed,predictions
0,"Staff and waiter & waitress are hospitable, bi...",positive,,"staff and waiter & waitress are hospitable, bi...","Staff and waiter & waitress be hospitable, big...",,,1
1,Longanisa is excellent.,positive,,longanisa is excellent.,Longanisa be excellent.,,,5
2,Great variety! All are delicious!,positive,,great variety! all are delicious!,Great variety! All be delicious!,,,5
3,Keep up the quality of food and service.,positive,,keep up the qualiti of food and service.,Keep up the quality of food and service.,,,1
4,Excellent service! All of them.,positive,,excel service! all of them.,Excellent service! All of them.,,,5
5,A perfect place to have breakfast! Great view ...,positive,,A perfect place to have breakfast! great view ...,A perfect place to have breakfast! Great view ...,,,5
6,Great food & service.,positive,,great food & service.,Great food & service.,,,5
7,Everyone is courteous and attentive. Excellent...,positive,,everyon is courteou and attentive. excel custo...,Everyone be courteous and attentive. Excellent...,,,5
8,Excellent. Grateful for the birthday greeting ...,positive,,excellent. grate for the birthday greet on the...,Excellent. Grateful for the birthday greet on ...,,,5
9,Staff commendation: Alexe,positive,,staff commendation: alex,Staff commendation: Alexe,,,1


In [24]:
pd.pivot_table(df2, index='sentiment', columns='predictions', aggfunc={'review':'count'})

Unnamed: 0_level_0,review,review,review
predictions,1,2,5
sentiment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
negative,7.0,1.0,1.0
positive,3.0,,7.0


In [3]:
reviews = pd.read_excel('s3a://smdatalabs-analytics/jay/smhcc-sample-data/hotel-reviews-v2.xlsx')

In [4]:
reviews.head()

Unnamed: 0,content-rating,content-text,content-title,date_of_stay,review_date,reviewer_loc,reviewer_name,source,Property
0,5,"Nice hotel , Nice room , Nice breakfast , Nice...",Nice Hotel”,2020-02-01 00:00:00,2020-07-07 00:00:00,Malaysia,Ivan,Agoda,TVH
1,5,It's just beside Skyranch so that's a plus.,It's a nice place with a nice view.”,2019-12-01 00:00:00,2020-05-26 00:00:00,Philippines,Rey,Agoda,TVH
2,5,Our family of 4 stayed here to celebrate my bi...,Best Place to Stay in Tagaytay with Kids”,2019-09-01 00:00:00,2020-03-19 00:00:00,Philippines,Maria,Agoda,TVH
3,4,My brother and his wife were in this hotel dur...,Honeymoon gift”,2019-12-01 00:00:00,2020-03-10 00:00:00,Philippines,ernani,Agoda,TVH
4,5,Newly renovated room,Awesome Breakfast”,2020-03-01 00:00:00,2020-03-10 00:00:00,Philippines,Ronald,Agoda,TVH


In [5]:
model = SentimentIntensityAnalyzer()

In [23]:
def get_sentiment(sentence):
    
    if not isinstance(sentence, str):
        return '' 
    
    score = model.polarity_scores(sentence)
    print(score)
    score.pop('compound')
    
    max_idx = list(score.values()).index(max(score.values()))
    return list(score.keys())[max_idx]

In [31]:
get_sentiment("This is by far the best hotel to stay at when visiting Tagaytay")

{'neg': 0.0, 'neu': 0.741, 'pos': 0.259, 'compound': 0.6369}


'neu'

In [16]:
reviews['prediction'] = reviews['content-text'].apply(lambda a: get_sentiment(a))

In [17]:
reviews.head(20)

Unnamed: 0,content-rating,content-text,content-title,date_of_stay,review_date,reviewer_loc,reviewer_name,source,Property,prediction
0,5,"Nice hotel , Nice room , Nice breakfast , Nice...",Nice Hotel”,2020-02-01 00:00:00,2020-07-07 00:00:00,Malaysia,Ivan,Agoda,TVH,pos
1,5,It's just beside Skyranch so that's a plus.,It's a nice place with a nice view.”,2019-12-01 00:00:00,2020-05-26 00:00:00,Philippines,Rey,Agoda,TVH,neu
2,5,Our family of 4 stayed here to celebrate my bi...,Best Place to Stay in Tagaytay with Kids”,2019-09-01 00:00:00,2020-03-19 00:00:00,Philippines,Maria,Agoda,TVH,neu
3,4,My brother and his wife were in this hotel dur...,Honeymoon gift”,2019-12-01 00:00:00,2020-03-10 00:00:00,Philippines,ernani,Agoda,TVH,neu
4,5,Newly renovated room,Awesome Breakfast”,2020-03-01 00:00:00,2020-03-10 00:00:00,Philippines,Ronald,Agoda,TVH,neu
5,5,Newly renovated rooms,Awesome Breakfast”,2020-03-01 00:00:00,2020-03-10 00:00:00,Philippines,Ronald,Agoda,TVH,neu
6,4,We had a great stay except for the hotel promi...,Great stay but....”,2019-03-01 00:00:00,2020-03-09 00:00:00,Singapore,Jinky,Agoda,TVH,neu
7,5,Really one of the best hotels in Tagaytay! Con...,Great Experience”,2020-03-01 00:00:00,2020-03-08 00:00:00,Philippines,Jennivie,Agoda,TVH,neu
8,5,Rooms need a little updating. Great location. ...,Exceptional”,2020-03-01 00:00:00,2020-03-08 00:00:00,Philippines,eric,Agoda,TVH,pos
9,4,Excellent hospitality,Clean and friendly ”,2020-03-01 00:00:00,2020-03-06 00:00:00,Canada,Restituto,Agoda,TVH,pos


In [30]:
reviews.iloc[16]['content-text']

'This is by far the best hotel to stay at when visiting Tagaytay'

In [21]:
reviews[reviews['prediction'] == 'neg'].head()

Unnamed: 0,content-rating,content-text,content-title,date_of_stay,review_date,reviewer_loc,reviewer_name,source,Property,prediction
371,5,.,Exceptional”,2018-08-01 00:00:00,2018-08-28 00:00:00,Philippines,Camille,Agoda,TVH,neg


In [86]:
df['prediction'] = df['comment'].apply(lambda a: get_sentiment(a))

In [91]:
pd.set_option('max_colwidth', 10000)
df

Unnamed: 0,comment,sentiment,prediction
0,"Staff and waiter & waitress are hospitable, big check √.",positive,neu
1,Longanisa is excellent.,positive,pos
2,Great variety! All are delicious!,positive,pos
3,Keep up the quality of food and service.,positive,neu
4,Excellent service! All of them.,positive,neu
5,"A perfect place to have breakfast! Great view and fantastic food! Jerby is very attentive & courteous, a true example of a good attendant! Keep up the good work!",positive,pos
6,Great food & service.,positive,pos
7,Everyone is courteous and attentive. Excellent customer service.,positive,pos
8,"Excellent. Grateful for the birthday greeting on the plate, Thanks Ma'am Norlyn. Thank you.",positive,pos
9,Staff commendation: Alexe,positive,neu
