In [1]:
import warnings
warnings.filterwarnings("ignore")

# import custom modules
from custom_libs import db
from custom_libs import preprocessing
from custom_libs import classification
from custom_libs import plotting
from custom_libs import utils
from custom_libs import dump
from custom_libs import db

In [2]:
text_column_name = 'review'
cleaned_text_column_name = 'review_cleaned'

In [3]:
df = db.get_dataset("McDonald_s_Reviews")
preprocessing.add_id_column(df, columns=["latitude", "longitude"])
preprocessing.add_rating_number_column(df)
proprocessing_function = preprocessing.preprocess_text
preprocessing.preprocess_dataframe(df, text_column_name, cleaned_text_column_name, proprocessing_function)
df.head(2)

Number of unique ids:  39


Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating,rating_number,id,review_cleaned
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1 star,1,9.0,look like someone spit food normal transaction...
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4 stars,4,9.0,far food atmosphere go staff make difference f...


In [4]:
sentiment_our_column_name = 'sentiment_our'
sentiment_auto_column_name = 'sentiment_auto'

In [5]:
# Our model
model = dump.load_model("model_chat")
vectorizer = dump.load_model("vectorizer_chat")
df[sentiment_our_column_name] = model.predict(vectorizer.transform(df[cleaned_text_column_name]))

# SentimentIntensityAnalyzer model
classification.append_sentiment_for_each_row(df, cleaned_text_column_name, new_column_name=sentiment_auto_column_name)

# Results
df[[cleaned_text_column_name, sentiment_auto_column_name, sentiment_our_column_name]]

Loading model from: dump_models/model_chat.pkl
Loading model from: dump_models/vectorizer_chat.pkl


Unnamed: 0,review_cleaned,sentiment_auto,sentiment_our
0,look like someone spit food normal transaction...,Positive,Neutral
1,far food atmosphere go staff make difference f...,Positive,Positive
2,make mobile order get speaker checked line mov...,Negative,Negative
3,mc crispy chicken sandwich customer service qu...,Neutral,Positive
4,repeat order time drive thru still manage mess...,Negative,Neutral
...,...,...,...
33391,treat badly,Negative,Neutral
33392,service good,Positive,Neutral
33393,remove hunger enough,Negative,Neutral
33394,good lately become expensive,Positive,Neutral


In [6]:
# Rows with different sentiment
df_differences = df[df[sentiment_our_column_name] != df[sentiment_auto_column_name]]
df_differences.shape

(14652, 15)

In [7]:
df_differences = df_differences[[text_column_name, sentiment_auto_column_name, sentiment_our_column_name]]
db.save_dataset(df_differences, "McDonald_s_Reviews_differences")

In [8]:
# df[[cleaned_text_column_name, sentiment_auto_column_name, sentiment_our_column_name]].sample(10)

In [9]:
def plot_sentiment_word_clouds(df):
    for sentiment in classification.Sentiment.get_all():
        df_sentiment = df[(df[sentiment_auto_column_name]==sentiment)]
        plotting.plot_word_cloud(df_sentiment, sentiment, cleaned_text_column_name, plt_name="Auto")
        df_sentiment = df[(df[sentiment_our_column_name]==sentiment)]
        plotting.plot_word_cloud(df_sentiment, sentiment, cleaned_text_column_name, plt_name="Our")

# plot_sentiment_word_clouds(df)

In [10]:
current_position = [30.460718, -97.792874]
max_distance = 20
best_rated_restaurant = utils.select_best_restaurant_from_stars(df, current_position, max_distance)
best_clf_column = sentiment_auto_column_name
best_feeling_restaurant = utils.select_best_restaurant_from_sentiment(df, current_position, max_distance, sentiment_column=best_clf_column)
best_rated_restaurant, best_feeling_restaurant

(                                    store_address   latitude  longitude
 id                                                                     
 8.0  8500 US-290, Austin, TX 78724, United States  30.329135 -97.660629,
                                          store_address   latitude  longitude
 id                                                                          
 9.0  13749 US-183 Hwy, Austin, TX 78750, United States  30.460718 -97.792874)

In [11]:
db.save_dataset(df, "McDonald_s_Reviews_preprocessed")