In [1]:
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import PassiveAggressiveClassifier

# import custom modules
from custom_libs import db
from custom_libs import preprocessing
from custom_libs import classification
from custom_libs import plotting
from custom_libs import utils
from custom_libs import dump

## Loading del dataset e Preprocessing

In [4]:
df = db.get_dataset("McDonald_s_Reviews")
preprocessing.add_id_column(df, columns=["latitude", "longitude"])
preprocessing.add_rating_number_column(df)
df.head(2)

Number of unique ids:  39


Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating,rating_number,id
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1 star,1,9.0
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4 stars,4,9.0


In [5]:
proprocessing_function = preprocessing.preprocess_text
preprocessing.preprocess_dataframe(df, 'review', proprocessing_function)
df.head(2)

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating,rating_number,id,review_clean
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1 star,1,9.0,look like someone spit food normal transaction...
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4 stars,4,9.0,far food atmosphere go staff make difference f...


## Cose delle Classificazioni

In [6]:
model = dump.load_model("svc_model")
vectorizer = dump.load_model("svc_vectorizer")

Loading model from: dump_models/svc_model.pkl
Loading model from: dump_models/svc_vectorizer.pkl


In [7]:
res = model.predict(vectorizer.transform(df['review_clean']))
df['sentiment_our'] = res
df.head(2)

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating,rating_number,id,review_clean,sentiment_our
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1 star,1,9.0,look like someone spit food normal transaction...,Neutral
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4 stars,4,9.0,far food atmosphere go staff make difference f...,Positive


In [8]:
classification.append_sentiment_for_each_row(df, 'review_clean', new_column_name='sentiment_auto')
df[['review_clean', 'sentiment_auto', 'sentiment_our']]

Unnamed: 0,review_clean,sentiment_auto,sentiment_our
0,look like someone spit food normal transaction...,Positive,Neutral
1,far food atmosphere go staff make difference f...,Positive,Positive
2,made mobile got speaker checked line moving le...,Negative,Negative
3,mc crispy chicken sandwich customer service qu...,Neutral,Positive
4,repeat time drive thru still manage mess suppo...,Negative,Neutral
...,...,...,...
33391,treated badly,Negative,Neutral
33392,service good,Positive,Neutral
33393,remove hunger enough,Negative,Negative
33394,good lately become expensive,Positive,Negative


In [12]:
# select only the rows where the sentiment_our and sentiment_auto is different
df2 = df[df['sentiment_our'] != df['sentiment_auto']]
df2.sample(2)

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating,rating_number,id,review_clean,sentiment_our,sentiment_auto
19739,19740,McDonald's,Fast food restaurant,"621 Broadway, Newark, NJ 07104, United States",40.77191,-74.161475,1564,5 years ago,Good poisoning,1 star,1,35.0,good poisoning,Neutral,Negative
22797,22798,McDonald's,Fast food restaurant,"5725 W Irlo Bronson Memorial Hwy, Kissimmee, F...",28.333508,-81.513738,5566,a month ago,A disappointing visit. We ordered using the ki...,2 stars,2,2.0,disappointing visit ordered using kiosk since ...,Neutral,Positive


In [15]:
# get the rows with sentiment_our=Positive and sentiment_auto=Negative or viceversa
df2 = df2[(df2['sentiment_our'] == classification.Sentiment.POSITIVE) & (df2['sentiment_auto'] == classification.Sentiment.NEGATIVE) | (df2['sentiment_our'] == classification.Sentiment.NEGATIVE) & (df2['sentiment_auto'] == classification.Sentiment.POSITIVE)]
df2.shape

(4060, 15)

In [None]:
def plot_sentiment_word_clouds(df):
    for sentiment in classification.Sentiment.get_all():
        df_sentiment = df[(df['sentiment']==sentiment)]
        plotting.plot_word_cloud(df_sentiment, sentiment, 'review_clean')

# plot_sentiment_word_clouds(df)

In [None]:
# non cancellare per ora questo commento: [30.460718, -97.792874]  max_distance=100
current_position = [30.460718, -97.792874]
best_rated_restaurant = utils.select_best_restaurant_from_stars(df, current_position, max_distance=100)

In [None]:
best_feeling_restaurant = utils.select_best_restaurant_from_sentiment(df, current_position, max_distance=100)
best_feeling_restaurant