In [1]:
import nltk
import tensorflow as tf
import matplotlib
import json
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from collections import Counter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from random import randrange
from nltk.corpus import stopwords
import string
import spacy
import pandas as pd
import os
import sys

In [2]:
#tokenise all the words with the help of a tokeniser
# for model
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer() #num_words is the tokeniser that fits the number of words

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense, Dropout, SpatialDropout1D #find out how come they using different types of drop outs
from tensorflow.keras.layers import Embedding
from tensorflow import keras
from tensorflow.keras import layers

from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
# function to use trained model to get sentiment
def predict_sentiment(text): #note that 1 denotes positive and 0 denotes negative
    model = keras.models.load_model('model')
    tw = tokenizer.texts_to_sequences([text])
    tw = pad_sequences(tw,maxlen=200)
    prediction = int(model.predict(tw).round().item())
    if prediction == 1:
        return "good"
    else:
        return "bad"

# Elastic Search

Search for reviews based on user input

In [4]:
with open('reviewSelected100.json') as f:
    data = json.loads("[" + 
        f.read().replace("}\n{", "},\n{") + 
    "]")

In [5]:
unique_businesses = set()
for review in data:
    unique_businesses.add(review['business_id'])
print("Number of businesses: " + str(len(unique_businesses)))

Number of businesses: 153


In [6]:
from elasticsearch import Elasticsearch
from elasticsearch import helpers

In [7]:
es = Elasticsearch(HOST="http://localhost", PORT=9200)

In [8]:
# Define body of index
body={
    'settings': {
        'number_of_shards': 1,
        'number_of_replicas': 0,
        'index': {
          'sort.field': 'date',
          'sort.order': 'asc'
        },

        # custom analyzer
        'analysis': {
            'analyzer': {
                'review_analyzer': {
                    'type': 'custom',
                      'tokenizer': 'standard',
                      'filter': ['lowercase', 'english_stop', 'porter_stem']
                    }
                  },
            'filter': {
                'english_stop': { 
                'type': 'stop',
                'stopwords': '_english_'
                }
            }
        }
    },
    'mappings': {
        'properties': {
            'text': {
                'type': 'text',
                'analyzer': 'review_analyzer',
                'search_analyzer': 'review_analyzer'
            },
            'date': {
                'type': 'date',
                'format': 'yyyy-MM-dd HH:mm:ss'
            }
        }
    }
}

In [9]:
def review_generator(data):
    for review in data:
        yield {
                "_index": index_name,
                "_type": "_doc",
                "_id" : f"{review['review_id']}",
                "_source": review,
            }

In [10]:
index_name = "review-index"
if not es.indices.exists(index=index_name):
    es.indices.create(index=index_name, body=body)
    helpers.bulk(es, review_generator(data))
    print("Index created")
else:
    print("Index already exists")

Index already exists


In [11]:
def retrieveUniqueReviewsByInput(userInput, es, index_name):
    doc_count = 0
    reviews = []
    query = {
        "size": 100,
        "query": {
            "query_string": {
                "query": userInput
            }
        }
    }
    # Make a search request
    res = es.search(index=index_name, body=query, scroll='2s')
    
    for doc in res['hits']['hits']:
        doc_count += 1
        reviews.append(doc['_source'])
    
    old_scroll_id = res['_scroll_id']
    
    while len(res['hits']['hits']):
        res = es.scroll(scroll_id=old_scroll_id, scroll='2s')
        old_scroll_id = res['_scroll_id']
        
        # Iterate over hits for each scroll
        for doc in res['hits']['hits']:
            doc_count += 1
            reviews.append(doc['_source'])
            
    # From the reviews retrieved, extract X reviews, one from each unique business
    unique_reviews = []
    business_set = set()
    count = 0
    while count < len(reviews) :
        # Don't take review if same business already taken
        if reviews[count]['business_id'] in business_set:
            count+=1
            continue
        else:
            business_set.add(reviews[count]['business_id'])
            unique_reviews.append(reviews[count]) 
            count+=1
    return unique_reviews

In [12]:
def retrieveBusinessReviews(businessID, es, index_name):
    doc_count = 0
    reviews = []
    query = {
        "size": 100,
        "query": {
            "query_string": {
                "query": businessID
            }
        }
    }
    # Make a search request
    res = es.search(index=index_name, body=query, scroll='2s')
    
    for doc in res['hits']['hits']:
        doc_count += 1
        reviews.append(doc['_source']['text'])
    
    old_scroll_id = res['_scroll_id']
    
    while len(res['hits']['hits']):
        res = es.scroll(scroll_id=old_scroll_id, scroll='2s')
        old_scroll_id = res['_scroll_id']

        # Iterate over hits for each scroll
        for doc in res['hits']['hits']:
            doc_count += 1
            reviews.append(doc['_source']['text'])
        
    return reviews

In [13]:
def main():
    user_input = input("Search:")
    
    # Get the reviews
    reviews_input = retrieveUniqueReviewsByInput(user_input, es, index_name)

    # get all business id with particular word
    business_id = []
    for i in range(len(reviews_input)):
        business_id.append(reviews_input[i]['business_id'])
    
    #get reviews of business with particular word
    reviews_by_business = []
    for i in range(len(business_id)):
        if (business_id[i][0] != '-'):
            businessreview = retrieveBusinessReviews(business_id[i], es, index_name)
        else:
            businessreview = retrieveBusinessReviews(business_id[i][1:], es, index_name)
        reviews_by_business.append(businessreview) 
    reviews_by_business_test = [["I usually don't waste my time with chain restaurants, but I've heard a lot of raves about Dunkin' Donuts coffee- when this location sprung up near me, I hurried over to try it. And was distinctly underwhelmed. \n\nI was surprised by the variety of the menu, though- in addition to iced and hot coffee drinks, the menu includes the namesake donuts, and various breakfast sandwiches. They have an order of oatmeal available as their lone healthy option.\n\nThis location does have a drive-thru, which is convenient. Service was a bit spotty, which is to be expected as they are new. Prices are on par with other coffee chains. Avoid the latte lites, which are straight-up aspartame.",
  'I stopped by this new Dunkin on my way to work this morning and ended up waiting to order a coffee in the drive-thru for a solid TEN minutes and there was no one in front of me. I understand they are new and probably trying to work the kinks out, but ten minutes to order an iced latte seems really excessive. On top of that, the latte I ordered had no flavor to it. I hope they improve their drive-thru time and drinks as this place is easily accessible and usually delicious!',
  'Went here 4 days in a row around 8:30 AM.\n\nDay 1: Ordered the #1 (2 donuts & a coffee) & the lady manager rings up my debit card & hands it back to me. Asks me what donuts I want all the while chatting/splitting her attention w/ the old guy that looks like he is a regular, daily customer.\n\nI finally get a chance to tell her & she tells me the ones I want are 40 cents extra each. I don\'t mind paying extra but she should have let me tell her what I wanted before swiping my card & handing it back to me. (The two donuts at 40 cents extra equals the total price of the #1 as stated on the menu so I\'m not really paying "extra" rather saving 80 cents if I get 2 regular donuts)\n\nDay 2: Ordered the #1 again. Girl at counter asks the guy manager if all the donuts costs the same for the #1 & he says yes. I am happy with my donuts, coffee, & the service so I order 50 donut holes to go for my coworkers.\n\nDay 3: Ordered the #1 again but went through the drive thru this time. The guy says he is the seasoned manager & asks me what I would like on my "ICED" coffee. I told him I would like 8 Splendas (2 per every 8 ounces plus an extra 1-2 due to the ice)... When I get to the window, he hands me s "HOT" coffee (16 oz) & I am already late to work so I just take it & leave. Putting 8 Splendas in 16 oz of hot coffee makes it way too sweet. The coffee sizes differ based on hot or cold (ie: a medium hot coffee is 16 oz while a medium cold iced coffee is 24 oz)\n\nDay 4: Decide to give this place one last chance. I order the #1 again & the young girl at the counter asks her manager if all the donuts are the same price to which he tells her that some are 40 cents extra. I had to repeat myself about what I wanted in my coffee but she looked stressed out so I just gave her a break.\n\nWhile waiting for my coffee, the two senior citizens in front of me were complaining about their coffee to the manager. The young girl left the register to help with the coffee for the different customers & mixed up some of them. After finally getting my coffe, I left.\n\nAll in all, donuts tasted great but their communication & service needs some work. I would still visit this location again but I would make sure to be extremely specific so they do not mess up anything.'],
 ["2.5/5, +.5 for service\n\nThe long anticipated stall that used to be Toro Sushi has finally opened. After passing by this place at least 10 times a week, I finally saw the open sign lit on Tuesday. Since I was busy yesterday, I decided to wait until tonight to eat here. For 9pm, this place was ridiculously busy. However I assume it was full of people who wanted to try this place for the first time, so I'm not expecting the full seated load to last. \n\nThe menu here is very extensive. There was your typical HK style cafe options, as well as dim sum. There are two featured items on the menu which are also advertised outside the restaurant along the wall facing Highway 7. I decided to try one of them, which was the Shanghai style beef noodle soup (forgot the exact term).\n\nAs we were waiting for our food, one of the employees gave us two 10% off cards, which are valid until January 31, 2015. The meal itself was also 10% off for their grand opening special.\n\nThe food took a good 15-20 minutes to come, however the iced lemon and iced milk teas both came rather quick. The milk tea here seems to be better than the lemon tea, as my dad found the milk tea to be good and I found the lemon tea to be mediocre. As for the noodle soup...it was also mediocre. Edible at least, but definitely not what I would say memorable.\n\nThere are a lot of other items in the menu that could be worth trying, but I'm gonna wait for other Yelpers to review this restaurant before considering coming back.",
  "So what does the newly opened Kitchen M offer up?\nThe short answer - its a Hong Kong style cafe\nIt was pretty busy on a Thursday afternoon but I'm guessing its because people are curious to find out what they serve here just like me. Service was blazing fast and we got our food within 5 minutes at the most from ordering. \n\nGrouper with spaghetti noodles and cream sauce $10.95 - includes vegetable soup and choice of HK style milk tea or coffee. Everything tasted good but nothing that will WOW you. Also, portions were huge so come on an empty stomach.\n\nMalaysian curry and chicken with rice $11.95 - once again includes vegetable soup and choice of hot drink. Found that the chicken was a bit tough and once again portions were huge.\n\nOn the back of the menu there is a build your noodle section so you can choose the type of noodles and select two meats to be added. Surprisingly you can't choose the type of soup. It was just your standard chicken broth type of soup. Went with the vermicelli noodles with pig intestine and fish skin, plus you also get a hot drink with it for $8.45. The portions were once again very large and the meal was good but nothing really memorable.\n\nIf you are looking for a quick meal that is cheap and filling then this is the place for you.",
  "Braised beef noodle failed as they use instant udon noodles, hate the after taste. 2 stars. \n\nAlso had sirloin steak spaghetti, steak is marinated Hk style so it doesn't quite taste like beef anymore, but ok it's tasty. 3.5 stars. \n\nGrouper spaghetti... It's ok. 3.5 stars. \n\nMilk tea is actually quite decent."],
 ["This is a pretty chill bar. Lots of seating and flat screen TVs to watch sports and lounge about. The music is Top 40s and is not loud, so you can easily carry conversations with good friends and have a great time. It is not, however, a place to mingle, hook up, or dance. It's simply not that type of vibe. But it is a good place to meet up friends before you out for the night to debauchery and randomness that is Vegas.",
  "This place just has a really cool vibe.  It can get crowded, and when it does, it seems to be even more fun.  The hot bartenders getting up and dancing on the bar are a favorite part of it for my wife and I.  The drinks are reasonable for Vegas. When busy service can be a bit slow, but we just can't not spend a little time here whenever we're at the MGM, which is pretty frequently.",
  "Small... but I like it a lot!\n\nCentrifuge sits on the casino floor right across from the entrance to Studio 54.  Pretty good location and at least for the duration I was there (weekday), the crowd was a mixed crowd with young and old.  They have nice booths and tables you can sit at and if you can get a table or a booth, it's a great place to people watch, either at the people in the bar or people out at the casino floors.  Vegas never fails to produce some interesting people haha.\n\nBartenders here are great.  Talkative, nice, and make fairly strong drinks.  Prices are average for Vegas, which is nothing to be surprised about.  They have about 5 beers on tap with a nice selection for the bottles.  One bartender even gave us a beer each on the house even though we've only been sitting there chilling with our beer.  Needless to say we gave him a nice tip in the end.  \n\nOne fun thing is that when certain songs are played, all the bartenders and wait staff will get up on the bar table and dance.  Even though they aren't that great, it's entertaining nonetheless!\n\nThis place is pretty ideal on a weekday to chill after work, and I'm sure on the weekends it can be much more packed."]]
        
    business_id_test = ['0kPm1zEpeXFRg8D2phqgCQ', 'spDZkD6cp0JUUm6ghIWHzA','XGaa9NDCwOJ9v0Cj55p28w']
    sentiment_ratio = []
    for i in range(len(reviews_by_business_test)):
        good_count = 0
        bad_count = 0
        for j in range(len(reviews_by_business_test[i])):
            review = reviews_by_business_test[i][j]
            sentiment = predict_sentiment(review)
            if sentiment == "good":
                good_count +=1
            if sentiment == "bad":
                bad_count +=1
        if bad_count != 0:
            ratio = (good_count/bad_count) / (good_count+bad_count)
        else:
            ratio = 1
        print(good_count)
        print(bad_count)
        sentiment_ratio.append(ratio)
    sentiment_ratio = [2,3,1]
    business_sentiment = dict(zip(business_id_test, sentiment_ratio))
    sorted_sentiment = sorted(business_sentiment.items(), key=lambda t: t[::-1],reverse=True)
    print(sorted_sentiment)
main()

Search:asd


KeyboardInterrupt: 

In [14]:
sentiment = predict_sentiment('text')

JSONDecodeError: Expecting value: line 1 column 1 (char 0)