### Importing libraries

In [1]:
import numpy as np
import pandas as pd

### Importing dataset

In [15]:
dataset = pd.read_csv('a2_RestaurantReviews_FreshDump.tsv', delimiter = '\t', quoting = 3)
dataset.tail()

Unnamed: 0,Review
95,I think food should have flavor and texture an...
96,Appetite instantly gone.
97,Overall I was not impressed and would not go b...
98,"The whole experience was underwhelming, and I ..."
99,"Then, as if I hadn't wasted enough of my life ..."


### Data cleaning

In [3]:
import re
import nltk

nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

all_stopwords = stopwords.words('english')
all_stopwords.remove('not')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/luiscruz/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [73]:
def clean_review(review):
    review = re.sub('[^a-zA-Z]', ' ', review)
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if not word in set(all_stopwords)]
    review = ' '.join(review)
    return review

corpus=[]

for i in range(0, 100):
  corpus.append(clean_review(dataset['Review'][i]))

### Data transformation

In [74]:
# Loading BoW dictionary
from sklearn.feature_extraction.text import CountVectorizer
import pickle
cvFile='c1_BoW_Sentiment_Model.pkl'
# cv = CountVectorizer(decode_error="replace", vocabulary=pickle.load(open('./drive/MyDrive/Colab Notebooks/2 Sentiment Analysis (Basic)/3.1 BoW_Sentiment Model.pkl', "rb")))
cv = pickle.load(open(cvFile, "rb"))


In [75]:
X_fresh = cv.transform(corpus).toarray()
X_fresh.shape

(100, 1420)

### Predictions (via sentiment classifier)

In [76]:
import joblib
classifier = joblib.load('c2_Classifier_Sentiment_Model')

In [77]:
y_pred = classifier.predict(X_fresh)
print(y_pred)

[0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 0
 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0]


In [78]:
dataset['predicted_label'] = y_pred.tolist()
dataset[dataset['predicted_label']==1]

Unnamed: 0,Review,predicted_label
1,Their regular toasted bread was equally satisf...,1
2,The Buffet at Bellagio was far from what I ant...,1
5,"Also, I feel like the chips are bought, not ma...",1
9,This is my new fav Vegas buffet spot.,1
10,I seriously cannot believe that the owner has ...,1
13,How can you call yourself a steakhouse if you ...,1
15,The only thing I wasn't too crazy about was th...,1
19,"What SHOULD have been a hilarious, yummy Chris...",1
22,"Every time I eat here, I see caring teamwork t...",1
38,"The meat was pretty dry, I had the sliced bris...",1


In [10]:
dataset.to_csv("c3_Predicted_Sentiments_Fresh_Dump.tsv", sep='\t', encoding='UTF-8', index=False)

### Predicting single inputs

In [68]:
review = input("Give me an input to perform a sentiment analysis.\n>")

Give me an input to perform a sentiment analysis.
>We are so glad we found this place.


In [69]:
# review = "We are so glad we found this place."
# review = "I'm not sure I will ever come back."
# review = "Loved it...friendly servers, great food, wonderful and imaginative menu.!"
processed_input = cv.transform([review]).toarray()[0]
prediction = classifier.predict([processed_input])[0]

In [70]:
prediction_map = {
    0: "negative",
    1: "positive"
}
print(f"The model believes the review is {prediction_map[prediction]}.")

The model believes the review is positive.
