# Importing libraries

In [4]:
import numpy as np
import pandas as pd

# Importing dataset

In [5]:
dataset = pd.read_csv('./a2_RestaurantReviews_FreshDump.tsv', delimiter='\t', quoting=3)
dataset.head()

Unnamed: 0,Review
0,Spend your money elsewhere.
1,Their regular toasted bread was equally satisf...
2,The Buffet at Bellagio was far from what I ant...
3,"And the drinks are WEAK, people!"
4,-My order was not correct.


# Data Cleaning

In [2]:
import re
import nltk

nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

all_stopwords = stopwords.words('english')
all_stopwords.remove('not')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/leihuang/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [8]:
corpus = []

for review in dataset['Review']:
    review = re.sub('[^a-zA-Z]', ' ', review)
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if not word in set(all_stopwords)]
    review = ' '.join(review)
    corpus.append(review)


# Data transformation

In [9]:
# Loading BoW dictionary
from sklearn.feature_extraction.text import CountVectorizer
import pickle
cvFile = './c1_BoW_Sentiment_model.pkl'
cv = pickle.load(open(cvFile, 'rb'))

In [11]:
X_fresh = cv.transform(corpus).toarray()
X_fresh.shape

(100, 1420)

# Predictions (via sentiment classifier)

In [12]:
import joblib
classifier = joblib.load('./c2_Classifier_Sentiment_Model')

In [13]:
y_pred = classifier.predict(X_fresh)
print(y_pred)

[0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 0
 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0]


In [14]:
dataset['predicted_label'] = y_pred.tolist()
dataset.head()

Unnamed: 0,Review,predicted_label
0,Spend your money elsewhere.,0
1,Their regular toasted bread was equally satisf...,1
2,The Buffet at Bellagio was far from what I ant...,1
3,"And the drinks are WEAK, people!",0
4,-My order was not correct.,0
