# Pilot use case:

Reviews extracted from TripAdvisor for:

Case 1: local Cyprus restaurants.

Case 2: Hotels in Cyprus.

In [1]:
import pandas as pd
import numpy as np
import time
import torch
import spacy
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import WordNetLemmatizer
from torch.utils.data import DataLoader, ConcatDataset
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from transformers import BertTokenizer
from ABSApip import load_model, predict_ABSA, predict_ATE, ABSA
from ATE_model import ATE
from Aspect_Polarity_model import AspectPolarityClassifier




In [2]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
pretrained_Tokenizer = "bert-base-uncased" # currently using "bert-base-uncased" --- try other tokenizers
tokenizer = BertTokenizer.from_pretrained(pretrained_Tokenizer)
#tokenizer = "word_tokenize()"

pretrained_model = "bert-base-uncased"

ATEmodel = ATE(pretrained_model).to(DEVICE)
Polaritymodel = AspectPolarityClassifier(pretrained_model).to(DEVICE)

ATEmodel = load_model(ATEmodel, 'ATE_mix_bert_base_3_lr2e.pkl')
Polaritymodel = load_model(Polaritymodel, 'Polarity_mix_bert_base_2_lr2e.pkl')

### CASE 1:

In [3]:
restaurant_df = pd.read_csv("data/TripadvisorReviewScraper_25_restaurant_review_2023-09-12.csv", encoding='latin-1')
restaurant_df.head()

Unnamed: 0,Rating,Review Title,Review
0,5,Delicious italian style pizza,If you like thin italian style pizza then you ...
1,5,Great,Quick. Lovely pizza. Would not go anywhere els...
2,1,Rude staff,Very rude and unhelpful staff. Asking for some...
3,5,Best pizza in town,I built my own pizza using vegan cheese and a ...
4,4,Really good pizza,"Really good, oven baked, reasonably priced piz..."


In [4]:
print(len(restaurant_df))

50


In [5]:
results_df_res = pd.DataFrame(columns = ["Review", "Output"])

for review in restaurant_df["Review"]:
    #print(review)
    #print("\n")
    termPol_tuples = ABSA(review, tokenizer, ATEmodel, Polaritymodel, DEVICE)
    #print("termPol_tuples outside: ", termPol_tuples)
    new_row = {'Review': review, 'Output': termPol_tuples}
    print("row: ", new_row)
    results_df_res.loc[len(results_df_res)] = new_row
    print("\n")
    print("new review")

tokens:  ['if', 'you', 'like', 'thin', 'italian', 'style', 'pizza', 'then', 'you', 'are', 'in', 'the', 'right', 'place', '.', 'reasonable', 'prices', 'and', 'delicious', 'pizza', '.', 'sufficient', 'pizza', 'size', 'for', 'an', 'adult', 'to', 'eat', '.'] 

predicted ATE values:  [0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0] 

term: ['italian style'] sentiment: ['Positive']
term: ['prices'] sentiment: ['Positive']
term: ['pizza'] sentiment: ['Positive']
term: ['pizza size'] sentiment: ['Positive']
row:  {'Review': 'If you like thin italian style pizza then you are in the right place.\nReasonable prices and delicious pizza.\nSufficient pizza size for an adult to eat.', 'Output': [('italian style', 'Positive'), ('prices', 'Positive'), ('pizza', 'Positive'), ('pizza size', 'Positive')]}


new review
tokens:  ['quick', '.', 'lovely', 'pizza', '.', 'would', 'not', 'go', 'anywhere', 'else', '.', 'wide', 'selection', 'of', 'topping', '##s', 'and', '

term: ['staff'] sentiment: ['Negative']
term: ['staff'] sentiment: ['Negative']
term: ['pizza'] sentiment: ['Positive']
term: ['people'] sentiment: ['Positive']
row:  {'Review': 'We loved this pizza. It always tastes so good, but not all the staff is very welcoming (especially one woman - she is so rude) generally the staff never smile. The only reason we visit the place is because we love their pizza and our daughter is asking for it. Otherwise I would prefer to go somewhere where the people would be kind and smiling.', 'Output': [('pizza', 'Positive'), ('staff', 'Negative'), ('staff', 'Negative'), ('pizza', 'Positive'), ('people', 'Positive')]}


new review
tokens:  ['choose', 'as', 'many', 'as', 'you', 'wish', 'and', 'make', 'it', 'unique', '-', 'but', 'most', 'importantly', 'is', 'the', 'bread', '-', 'thin', 'and', 'right', '##ly', 'cooked', '.', 'it', 'is', 'worth', 'visiting', '.'] 

predicted ATE values:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0

predicted ATE values:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 

term: ['meals'] sentiment: ['Negative']
term: ['meal'] sentiment: ['Negative']
term: ['lasagne'] sentiment: ['Negative']
term: [

predicted ATE values:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 

term: ['manager'] sentiment: ['Negative']
term: ['manager'] sentiment: ['Negative']
term: ['guy'] sentiment: ['Negative']
term: ['client'] sentiment: ['Negative']
term: ['manager'

term: ['pizzas'] sentiment: ['Negative']
term: ['pastas'] sentiment: ['Negative']
row:  {'Review': 'We visited the restaurant about 10pm and it was very crowded. Lots of kids crying and no music at all. Bad service. Very bad service. Food came on time. Not so tasty pizzas. Dislike the pastas!', 'Output': [('music', 'Negative'), ('service', 'Negative'), ('service', 'Negative'), ('food', 'Negative'), ('pizzas', 'Negative'), ('pastas', 'Negative')]}


new review
tokens:  ['nice', 'class', '##y', 'environment', ',', 'pizza', 'is', 'very', 'ta', '##sty', ',', 'i', "'", 've', 'been', 'a', 'long', '-', 'term', 'customer', 'and', 'i', "'", 'm', 'very', 'satisfied', '.', 'they', 'have', 'a', 'lot', 'of', 'vegetarian', 'options', 'and', 'few', 'vega', '##n', 'options', 'as', 'well', '.', 'my', 'only', 'suggestion', 'is', 'to', 'make', 'pizza', 'with', 've', '##get', '##al', 'cheese', 'all', 'year', 'round', ',', 'and', 'not', 'only', '.', '.', '.', 'on', 'fast', '##ing', 'periods', '.', 'it', "'

term: ['pizza'] sentiment: ['Positive']
row:  {'Review': 'Hey TA. For more than 15 years pizza mia is my favorite pizza in Limassol ! I love it and i am a big fan!', 'Output': [('pizza mia', 'Positive'), ('pizza', 'Positive')]}


new review
tokens:  ['pleasant', 'place', 'to', 'go', 'for', 'lunch', 'or', 'dinner', '!', 'all', 'pizza', '##s', 'are', 'amazing', ',', 'and', 'if', 'you', 'are', 'a', 'big', 'group', 'you', 'can', 'order', 'their', 'biggest', 'pizza', '"', 'go', '##lia', '##th', '"', 'in', 'a', 'very', 'good', 'price', '!', '!', 'since', 'i', 'am', 'a', 'pizza', 'lover', 'i', 'truly', 'recommend', 'this', 'place', 'for', 'din', '##e', 'in', 'or', 'delivery', '!'] 

predicted ATE values:  [0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0] 

term: ['lunch'] sentiment: ['Positive']
term: ['dinner'] sentiment: ['Positive']
term: ['pizzas'] sentiment: ['Posi

term: ['menu'] sentiment: ['Positive']
row:  {'Review': 'One of the best restaurants in nicosia because of their Unique plant based menu that you wont find anywhere else.', 'Output': [('plant based', 'Positive'), ('menu', 'Positive')]}


new review
tokens:  ['i', 'don', '##t', 'usually', 'write', 'reviews', ',', 'but', 'when', 'i', 'meet', 'people', 'who', 'pour', 'their', 'hearts', 'into', 'a', 'healthy', 'concept', ',', 'looking', 'to', 'share', 'so', 'ta', '##sty', 'recipes', 'and', 'adding', 'value', 'into', 'peoples', 'lives', '.', '.', 'here', 'i', 'am', '.', 'if', 'you', 'are', 'looking', 'for', 'a', 'cozy', 'place', 'where', 'to', 'eat', ',', 'one', 'with', 'healthy', 'and', 'delicious', 'food', ',', 'i', 'recommend', 'you', 'ely', '##sian'] 

predicted ATE values:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] 

term: ['recipes'] se

predicted ATE values:  [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 

term: ['food'] sentiment: ['Neutral']
term: ['pork'] sentiment: ['Negative']
row:  {'Review': 'I used to get this food delivered, but not anymore. The pork was always raw, and never got the order correct.', 'Output': [('food', 'Neutral'), ('pork', 'Negative')]}


new review
tokens:  ['wish', 'the', 'place', 'has', 'more', 'of', 'a', 'character', '(', 'but', 'it', 'doesn', "'", 't', ')', 'but', 'the', 'food', 'is', 'very', 'good', '.', 'went', 'on', 'a', 'friday', 'and', 'there', 'was', 'also', 'live', 'music', ';', 'just', 'a', 'guy', 'play', 'and', 'singing', 'but', 'he', 'was', 'great', 'and', 'helped', 'getting', 'you', 'in', 'the', 'mood', ';', 'we', 'ended', 'up', 'dancing', '!'] 

predicted ATE values:  [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 

term: ['p

In [6]:
print(len(results_df_res))
results_df_res.head()

50


Unnamed: 0,Review,Output
0,If you like thin italian style pizza then you ...,"[(italian style, Positive), (prices, Positive)..."
1,Quick. Lovely pizza. Would not go anywhere els...,"[(pizza, Positive), (selection of, Positive), ..."
2,Very rude and unhelpful staff. Asking for some...,"[(staff, Negative), (ingredients, Negative), (..."
3,I built my own pizza using vegan cheese and a ...,"[(pizza, Positive), (vegan, Positive), (variet..."
4,"Really good, oven baked, reasonably priced piz...","[(oven baked, Positive), (priced, Positive), (..."


In [7]:
df_vis_res = pd.DataFrame(columns = ["Review", "Tuple", "Aspect", "Polarity"])

for i in range(len(results_df_res)):
    output = results_df_res.loc[(results_df_res["Review"] ==results_df_res["Review"][i]), "Output"].values
    #print("review: ",results_df_res["Review"][i])
    #print("output: ", output)
    #print("\n")
    #print(type(output))
    #print(type(output[0]))
    list_res = output[0]
    #print(list_res)
    #print(type(list_res))
    #print(type(list_res[0]))
    for tup in list_res:
        #print(tup)
        #print(type(tup))
        aspect, polarity = tup
        print(tup)
        #print(aspect)
        #print(polarity)
        new_row = {'Review': results_df_res["Review"][i], 'Tuple': tup, 'Aspect': aspect, 'Polarity': polarity}
        df_vis_res.loc[len(df_vis_res)] = new_row

('italian style', 'Positive')
('prices', 'Positive')
('pizza', 'Positive')
('pizza size', 'Positive')
('pizza', 'Positive')
('selection of', 'Positive')
('staff', 'Positive')
('staff', 'Negative')
('ingredients', 'Negative')
('staff', 'Negative')
('pizza', 'Positive')
('vegan', 'Positive')
('variety of', 'Neutral')
('oven', 'Neutral')
('vegan', 'Positive')
('oven baked', 'Positive')
('priced', 'Positive')
('pizza', 'Positive')
('chorizo', 'Neutral')
('cvase', 'Positive')
('selection of', 'Positive')
('toppings', 'Positive')
('pizza', 'Positive')
('brick oven', 'Positive')
('drinks', 'Positive')
('salads', 'Positive')
('price', 'Positive')
('pizza', 'Positive')
('price', 'Positive')
('charge', 'Positive')
('pizza', 'Positive')
('entertainment app', 'Negative')
('personnel', 'Negative')
('pizza', 'Positive')
('staff', 'Negative')
('staff', 'Negative')
('pizza', 'Positive')
('people', 'Positive')
('bread', 'Positive')
('hawajan', 'Positive')
('friendly service', 'Positive')
('pizza', 'Pos

In [8]:
df_vis_res.head()

Unnamed: 0,Review,Tuple,Aspect,Polarity
0,If you like thin italian style pizza then you ...,"(italian style, Positive)",italian style,Positive
1,If you like thin italian style pizza then you ...,"(prices, Positive)",prices,Positive
2,If you like thin italian style pizza then you ...,"(pizza, Positive)",pizza,Positive
3,If you like thin italian style pizza then you ...,"(pizza size, Positive)",pizza size,Positive
4,Quick. Lovely pizza. Would not go anywhere els...,"(pizza, Positive)",pizza,Positive


In [9]:
# pd.set_option("display.max_colwidth", None)
# print(df_vis_res)

In [10]:
df_vis_res.head(10)

Unnamed: 0,Review,Tuple,Aspect,Polarity
0,If you like thin italian style pizza then you ...,"(italian style, Positive)",italian style,Positive
1,If you like thin italian style pizza then you ...,"(prices, Positive)",prices,Positive
2,If you like thin italian style pizza then you ...,"(pizza, Positive)",pizza,Positive
3,If you like thin italian style pizza then you ...,"(pizza size, Positive)",pizza size,Positive
4,Quick. Lovely pizza. Would not go anywhere els...,"(pizza, Positive)",pizza,Positive
5,Quick. Lovely pizza. Would not go anywhere els...,"(selection of, Positive)",selection of,Positive
6,Quick. Lovely pizza. Would not go anywhere els...,"(staff, Positive)",staff,Positive
7,Very rude and unhelpful staff. Asking for some...,"(staff, Negative)",staff,Negative
8,Very rude and unhelpful staff. Asking for some...,"(ingredients, Negative)",ingredients,Negative
9,Very rude and unhelpful staff. Asking for some...,"(staff, Negative)",staff,Negative


In [11]:
df_vis_res.to_csv("hotels1.csv", index=False)

In [12]:
print(len(df_vis_res))

242


## Lemmatize predicted aspect terms

There are many aspects that appear in different forms but refer to the same aspect/feature, e.g., price can also appear as prices. It is a good practice to find such cases and lemmatise them so they all appear in the same, root form, in this case price. This will give a better image when accumulating total frequency of aspects.

In [13]:
# import nltk
# nltk.download('omw-1.4')

In [14]:
def lemmatize(text1):
    
    # Tokenize and lemmatize asprcts
    tokens1 = word_tokenize(text1)
    lemmatizer = WordNetLemmatizer()
    
    if(len(tokens1) > 1):
        #print("\n")
        tokens1 = [lemmatizer.lemmatize(token) for token in tokens1]
        asp1 = " ".join(z for z in tokens1)
        #print(asp1)
    else:
        tokens1 = [lemmatizer.lemmatize(token) for token in tokens1]
        asp1 = tokens1[0]

    return asp1

In [15]:
#temp = ["prices", "price", "pric", "pizza", "toppings", "waiter", "service", "area", "souvlaki", "souvlakia", "souvlak", "drink", "drinks"]

for i in range(len(df_vis_res["Aspect"])):
    aspect = df_vis_res["Aspect"][i]
    #print(aspect)
    
#     if (len(aspect.split()) > 1):
#         print("\n")
#         continue
#     else:
    aspect_lemma = lemmatize(aspect)
    print(df_vis_res["Aspect"][i])
    print(aspect_lemma)
    df_vis_res["Aspect"][i] = aspect_lemma
    
    print("\n")

italian style
italian style


prices
price


pizza
pizza


pizza size
pizza size


pizza
pizza


selection of
selection of


staff
staff


staff
staff


ingredients
ingredient


staff
staff


pizza
pizza


vegan
vegan


variety of
variety of


oven
oven


vegan
vegan


oven baked
oven baked


priced
priced


pizza
pizza


chorizo
chorizo


cvase
cvase


selection of
selection of


toppings
topping


pizza
pizza


brick oven
brick oven


drinks
drink


salads
salad


price
price


pizza
pizza


price
price


charge
charge


pizza
pizza


entertainment app
entertainment app


personnel
personnel


pizza
pizza


staff
staff


staff
staff


pizza
pizza


people
people


bread
bread


hawajan
hawajan


friendly service
friendly service


pizza
pizza


service
service


online ordering
online ordering


staff
staff


delivery
delivery


food quality
food quality


pizza
pizza


pizza
pizza


pizza
pizza


food
food


pizza
pizza


prices
price


games
game


dinner
dinner


pizza delivery
pi

In [16]:
df_vis_res.to_csv("Tableau_res1.csv", index=False)

In [17]:
lemm = lemmatize("pizzas slices")
print(lemm)

pizza slice


In [18]:
df2 = df_vis_res[["Aspect","Polarity"]]

In [19]:
df2.head()

Unnamed: 0,Aspect,Polarity
0,italian style,Positive
1,price,Positive
2,pizza,Positive
3,pizza size,Positive
4,pizza,Positive


In [20]:
unique_count = df2.groupby('Aspect')['Polarity'].value_counts()

In [21]:
print(unique_count)

Aspect             Polarity
# # cies           Positive    1
# # lakia          Negative    1
# # vlaki          Positive    1
-                  Negative    1
air                Negative    1
                              ..
vegetal            Positive    1
vegetarian option  Positive    1
waiter             Negative    1
water              Negative    1
                   Positive    1
Name: Polarity, Length: 133, dtype: int64


In [22]:
aspect_count_og = df2["Aspect"].value_counts()

print(aspect_count_og)

pizza        29
food         22
service      12
price        12
staff        10
             ..
pepperoni     1
mushroom      1
juice         1
waiter        1
meat          1
Name: Aspect, Length: 110, dtype: int64


In [23]:
aspect_count_og = df2["Polarity"].value_counts()

print(aspect_count_og)

Positive    141
Negative     86
Neutral      15
Name: Polarity, dtype: int64


# Case 2

In [24]:
hotel_df = pd.read_csv("data/hotel_reviews.csv", encoding='latin-1')
hotel_df.head()

Unnamed: 0,Review
0,Love everything about this hotel. Will come ba...
1,"luxury hotel, perfectly located at the beachfr..."
2,We stayed in the TOP NEW Signature Suite. Amaz...
3,"This hotel really is exceptional, from check-i..."
4,The most amazing hotel in Cyprus!!! The servic...


In [25]:
print(len(hotel_df))

32


In [26]:
results_df_hotel = pd.DataFrame(columns = ["Review", "Output"])

for review in hotel_df["Review"]:
    #print(review)
    #print("\n")
    termPol_tuples = ABSA(review, tokenizer, ATEmodel, Polaritymodel, DEVICE)
    #print("termPol_tuples outside: ", termPol_tuples)
    new_row = {'Review': review, 'Output': termPol_tuples}
    print("row: ", new_row)
    results_df_hotel.loc[len(results_df_hotel)] = new_row
    print("\n")
    print("new review")

tokens:  ['love', 'everything', 'about', 'this', 'hotel', '.', 'will', 'come', 'back', 'soon', '.', 'great', 'rooms', ',', 'great', 'location', ',', 'friendly', 'staff', ',', 'loved', 'the', 'chinese', 'restaurant', 'and', 'the', 'massage', 'was', 'awesome', '!', 'breakfast', 'was', 'amazing', '.', 'indoor', 'pool', 'with', 'sa', '##una', '!'] 

predicted ATE values:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 2, 0, 2, 2, 0] 

term: ['rooms'] sentiment: ['Positive']
term: ['location'] sentiment: ['Positive']
term: ['staff'] sentiment: ['Positive']
term: ['chinese'] sentiment: ['Positive']
term: ['massage'] sentiment: ['Positive']
term: ['breakfast'] sentiment: ['Positive']
term: ['indoor pool'] sentiment: ['Positive']
row:  {'Review': 'Love everything about this hotel. Will come back soon. Great rooms, great location, friendly staff, loved the chinese restaurant and the Massage was awesome! Breakfast was amazing. Indoor poo

predicted ATE values:  [0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 2, 0, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 

term: ['beds'] sentiment: ['Positive']
term: ['pillows'] sentiment: ['Positive']
term: ['bath robes'] sentiment: ['Positive']
term: ['room lighting'] sentiment: ['Positive']
term: ['breakfasts'] sentiment: ['Positive']
term: ['equipment'] sentiment: ['Positive']
term: ['sauna'] sentiment: ['Positive']
row:  {'Review': 'Rooms have very comfortable beds, pillows, very nice bath robes, room lighting, breakfasts are so reach and hard to resist. Gym has new and good equipment, sauna is very good. Plenty of flowers and plants around.', 'Output': [('beds', 'Positive'), ('pillows', 'Positive'), ('bath robes', 'Positive'), ('room lighting', 'Positive'), ('breakfasts', 'Positive'), ('equipment', 'Positive'), ('sauna', 'Positive')]}


new review
tokens:  ['could', 'not', 'be', 'happier', 'with', 'our', 'visit', '!', 'everything', 'was'

predicted ATE values:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 

term: ['bathroom'] sentiment: ['Negative']
term: ['ceiling'] sentiment: ['Negative']
term: ['tiles'] sentiment: ['Negative']
term: ['toilet'] sentiment: ['Negative']
term: ['shower'] sentiment: ['Negative']
term: ['floors'] sentiment: ['Negative']
term: ['sink'] sentiment: ['Negative']
row:  {'Review': "When we got to our room we were horrified at the poor state. The bathroom was the main issue, hairs left from the previous occupants, black ceiling, tiles were filthy, toilet didn't flush properly, shower was held in place by what looked like chewing gum.The floors needed a very good wash.It gave you black feet if walking bare footed. The sink in the kitchen had white marks in it and looked disgusting", 'Output

term: ['room'] sentiment: ['Negative']
term: ['room'] sentiment: ['Negative']
term: ['heater'] sentiment: ['Negative']
row:  {'Review': 'There was noise during the day and the heating did not work. The room was frozen and when we asked to change the room they answered that they will give us a small heater...\xa0', 'Output': [('heating', 'Negative'), ('room', 'Negative'), ('room', 'Negative'), ('heater', 'Negative')]}


new review
tokens:  ['1', '.', 'dirty', 'rooms', '.', '2', '.', 'uncle', '##an', 'sheets', '.', '3', '.', 'same', 'breakfast', 'everyday', 'which', 'is', 'always', 'cold', '.', '4', '.', 'hot', 'water', 'is', 'in', '##fr', '##e', '##quent', '.', '5', '.', 'the', 'air', 'conditioning', 'does', 'not', 'work', 'well', '.', 'the', 'rooms', 'get', 'very', 'hot', 'and', 'stuff', '##y', 'at', 'night', '.', 'you', 'cannot', 'sleep', 'with', 'a', 'window', 'open', 'since', 'it', 'is', 'a', 'very', 'busy', 'street', 'outside', '.'] 

predicted ATE values:  [0, 0, 0, 1, 0, 0, 0, 0,

In [27]:
print(len(results_df_hotel))
results_df_hotel.head()

32


Unnamed: 0,Review,Output
0,Love everything about this hotel. Will come ba...,"[(rooms, Positive), (location, Positive), (sta..."
1,"luxury hotel, perfectly located at the beachfr...","[(premises, Positive), (amenities, Positive), ..."
2,We stayed in the TOP NEW Signature Suite. Amaz...,"[(desing, Positive), (view, Positive), (ambien..."
3,"This hotel really is exceptional, from check-i...","[(departure, Positive), (meals, Positive), (br..."
4,The most amazing hotel in Cyprus!!! The servic...,"[(service, Positive), (staff, Positive)]"


In [28]:
df_vis_hotel = pd.DataFrame(columns = ["Review", "Tuple", "Aspect", "Polarity"])

for i in range(len(results_df_hotel)):
    output = results_df_hotel.loc[(results_df_hotel["Review"] ==results_df_hotel["Review"][i]), "Output"].values
    list_hotel = output[0]
    for tup in list_hotel:
        aspect, polarity = tup
        print(tup)
        #print(aspect)
        #print(polarity)
        new_row = {'Review': results_df_hotel["Review"][i], 'Tuple': tup, 'Aspect': aspect, 'Polarity': polarity}
        df_vis_hotel.loc[len(df_vis_hotel)] = new_row

('rooms', 'Positive')
('location', 'Positive')
('staff', 'Positive')
('chinese', 'Positive')
('massage', 'Positive')
('breakfast', 'Positive')
('indoor pool', 'Positive')
('premises', 'Positive')
('amenities', 'Positive')
('buffet breakfast', 'Positive')
('stuff', 'Positive')
('desing', 'Positive')
('view', 'Positive')
('ambience', 'Positive')
('service', 'Positive')
('performance', 'Positive')
('staff', 'Positive')
('departure', 'Positive')
('meals', 'Positive')
('breakfast', 'Positive')
('service', 'Positive')
('service', 'Positive')
('staff', 'Positive')
('lobby', 'Positive')
('rooms', 'Positive')
('restaurants', 'Positive')
('pools', 'Positive')
('gardens', 'Positive')
('beach', 'Positive')
('staff', 'Positive')
('time', 'Positive')
('room', 'Positive')
('staff', 'Positive')
('beaches', 'Positive')
('food', 'Positive')
('staff', 'Positive')
('facilities', 'Positive')
('beds', 'Positive')
('pillows', 'Positive')
('bath robes', 'Positive')
('room lighting', 'Positive')
('breakfasts',

In [29]:
df_vis_hotel.head(10)

Unnamed: 0,Review,Tuple,Aspect,Polarity
0,Love everything about this hotel. Will come ba...,"(rooms, Positive)",rooms,Positive
1,Love everything about this hotel. Will come ba...,"(location, Positive)",location,Positive
2,Love everything about this hotel. Will come ba...,"(staff, Positive)",staff,Positive
3,Love everything about this hotel. Will come ba...,"(chinese, Positive)",chinese,Positive
4,Love everything about this hotel. Will come ba...,"(massage, Positive)",massage,Positive
5,Love everything about this hotel. Will come ba...,"(breakfast, Positive)",breakfast,Positive
6,Love everything about this hotel. Will come ba...,"(indoor pool, Positive)",indoor pool,Positive
7,"luxury hotel, perfectly located at the beachfr...","(premises, Positive)",premises,Positive
8,"luxury hotel, perfectly located at the beachfr...","(amenities, Positive)",amenities,Positive
9,"luxury hotel, perfectly located at the beachfr...","(buffet breakfast, Positive)",buffet breakfast,Positive


In [30]:
print(len(df_vis_hotel))

130


In [31]:
#temp = ["prices", "price", "pric", "pizza", "toppings", "waiter", "service", "area", "souvlaki", "souvlakia", "souvlak", "drink", "drinks"]

for i in range(len(df_vis_hotel["Aspect"])):
    aspect = df_vis_hotel["Aspect"][i]
    #print(aspect)
    
#     if (len(aspect.split()) > 1):
#         print("\n")
#         continue
#     else:
    aspect_lemma = lemmatize(aspect)
    print(df_vis_hotel["Aspect"][i])
    print(aspect_lemma)
    df_vis_hotel["Aspect"][i] = aspect_lemma
    
    print("\n")

rooms
room


location
location


staff
staff


chinese
chinese


massage
massage


breakfast
breakfast


indoor pool
indoor pool


premises
premise


amenities
amenity


buffet breakfast
buffet breakfast


stuff
stuff


desing
desing


view
view


ambience
ambience


service
service


performance
performance


staff
staff


departure
departure


meals
meal


breakfast
breakfast


service
service


service
service


staff
staff


lobby
lobby


rooms
room


restaurants
restaurant


pools
pool


gardens
garden


beach
beach


staff
staff


time
time


room
room


staff
staff


beaches
beach


food
food


staff
staff


facilities
facility


beds
bed


pillows
pillow


bath robes
bath robe


room lighting
room lighting


breakfasts
breakfast


equipment
equipment


sauna
sauna


room
room


breakfast buffet
breakfast buffet


staff
staff


pool facilities
pool facility


restaurant
restaurant


staff
staff


room
room


coffee
coffee


bottles of
bottle of


served
served


breakfast
breakf

In [32]:
df_vis_hotel.to_csv("hotels1.csv", index=False)

In [33]:
aspect_count_hotel = df_vis_hotel["Aspect"].value_counts()

print(aspect_count_hotel)

room            19
staff           12
breakfast       10
facility         3
bed              3
                ..
selection of     1
location         1
# # luten        1
dinner           1
place            1
Name: Aspect, Length: 77, dtype: int64


In [34]:
sentiment_count_hotel = df_vis_hotel["Polarity"].value_counts()

print(sentiment_count_hotel)

Positive    67
Negative    55
Neutral      8
Name: Polarity, dtype: int64
