## Import libraries

In [1]:
import os
from importlib import reload
from collections import Counter

In [2]:
import nltk
import numpy as np
import pandas as pd

In [3]:
from nltk.wsd import lesk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords as sw
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn

In [4]:
from utils import helper

## Load necessary NLTK documents

In [5]:
### Comment out to download them if you don't have them
# nltk.download("punkt")
# nltk.download("averaged_perceptron_tagger")
# nltk.download("stopwords")
# nltk.download("wordnet")
# nltk.download("sentiwordnet")

### If you already have them append their path here
nltk.data.path.append(os.path.abspath("./data/nltk/"))

ps = PorterStemmer()
wn_lemmatizer = nltk.WordNetLemmatizer()
tag_to_pos = {"J": wn.ADJ, "V": wn.VERB, "N": wn.NOUN, "R": wn.ADV}

**User comments / reviews**

In [6]:
# Example user past comments and reviews
user_comments = [
    "My travel to the Festival was the best trip ever, the music was great.",
    "I am never travelling to that Restaurant again, the food was awful and looked really bad.",
    "My tour round that Country was neither great not bad and the traffic was terrible.",
    "I expected more from my trip to the Museum but the art on display was boring."
]
 
print("Number of user comments/reviews: ", len(user_comments))

Number of user comments/reviews:  4


**Tourist attractions reviews**

In [7]:
# Example tourist site past comments and reviews
sites_reviews = {
"site_a": [
    "My travel to the Festival was the best trip ever, the music was great.",
    "The music, food, art, was amazing."
],
"site_b": [
    "I am never travelling to that Restaurant again, the food was awful and looked really bad.",
    "A waste of money and time, never coming back here."
],
"site_c":  [
    "My tour round that Country was neither great not bad and the traffic was terrible.",
    "I expected more from my trip to the Museum but the art on display were boring.",
    "Not sure if I want to come back here, neither the art not the journey was memorable"
]}
 
print("Number of tourist sites: ", len(sites_reviews))

Number of tourist sites:  3


**Make recommendations**

In [8]:
user_prefs = helper.extract_user_prefs(user_comments, True, 0.5, verbose=False)

EXTRACTED NOUNS:
=====

 [[Synset('travel.n.01'), Synset('festival.n.02'), Synset('trip.n.01'), Synset('music.n.04')], [Synset('restaurant.n.01'), Synset('food.n.01')], [Synset('tour.n.01'), Synset('round.n.11'), Synset('nation.n.02'), Synset('traffic.n.01')], [Synset('trip.n.07'), Synset('museum.n.01'), Synset('art.n.01'), Synset('display.n.02')]]

CLUSTERS:
=====

cluster 1: count=3, [Synset('art.n.01'), Synset('museum.n.01'), Synset('restaurant.n.01')]
cluster 2: count=6, [Synset('display.n.02'), Synset('festival.n.02'), Synset('tour.n.01'), Synset('travel.n.01'), Synset('trip.n.01'), Synset('trip.n.07')]
cluster 3: count=1, [Synset('food.n.01')]
cluster 4: count=1, [Synset('music.n.04')]
cluster 5: count=2, [Synset('nation.n.02'), Synset('traffic.n.01')]
cluster 6: count=1, [Synset('round.n.11')]


CLUSTERS SCORE:
=====

cluster: 1, score: -2.75
cluster: 2, score: -1.5
cluster: 3, score: -2.75
cluster: 4, score: 0.25
cluster: 5, score: -1.75
cluster: 6, score: -1.75


USER PREFEREN

In [9]:
sites_feats = helper.extract_sites_feats(sites_reviews)

SITE FEATURES:

 {'site_a': [Synset('music.n.05'), Synset('travel.n.01'), Synset('festival.n.02'), Synset('tripper.n.04'), Synset('food.n.03')], 'site_b': [Synset('restaurant.n.01'), Synset('food.n.02'), Synset('waste.n.05'), Synset('money.n.02'), Synset('time.n.02')], 'site_c': [Synset('art.n.01'), Synset('tour.n.01'), Synset('turn.n.09'), Synset('nation.n.02'), Synset('traffic.n.03')]}


In [10]:
site_and_score = helper.make_recommendation(user_prefs, sites_feats)

Best score and site with the best score:

 0.5 site_a


Sites sorted by score:

 [('site_a', 0.5), ('site_c', 0.42105263157894735), ('site_b', 0.375)]


## The END