## Importing libraries

In [1]:
import numpy as np
import pandas as pd

## Importing dataset

In [2]:
dataset = pd.read_csv('Data/Fresh Restaurant Reviews.tsv', delimiter = '\t')
dataset.head()

Unnamed: 0,Review
0,Spend your money elsewhere.
1,Their regular toasted bread was equally satisf...
2,The Buffet at Bellagio was far from what I ant...
3,"And the drinks are WEAK, people!"
4,-My order was not correct.


## Data Pre-processing
#### same as training
1. Dropping special characters and numbers
2. Convert all word to small case
3. Dropping stop words
4. Stemming

In [3]:
import re
import nltk

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [4]:
# download only one time
# nltk.download('stopwords')

ps = PorterStemmer()

stopwords = stopwords.words('english')

In [5]:
# making string corpus
corpus=[]

for i in range(0, 100):
    # re.sub(pattern, repl, string)
    # [^a-zA-Z] regex expression mean -> match all strings that contain a letter
    review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if not word in stopwords]
    review = ' '.join(review)
    corpus.append(review)

## Data transformation
#### same as training
Transform data into useful representation as a bag of words 

In [6]:
# Loading BoW dictionary
from sklearn.feature_extraction.text import CountVectorizer
import pickle
cvFile='BoW.pkl'
cv = pickle.load(open(cvFile, "rb"))


In [7]:
X_fresh = cv.transform(corpus).toarray()
X_fresh

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

## Predictions (via sentiment classifier)

In [8]:
import joblib
classifier = joblib.load('Models/LR_Classifier_Model')

y_pred = classifier.predict(X_fresh)
print(y_pred)

[0 1 1 0 1 1 1 1 0 1 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1
 1 0 0 1 0 0 1 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0]


In [9]:
dataset['predicted_label'] = y_pred.tolist()
dataset.head()

Unnamed: 0,Review,predicted_label
0,Spend your money elsewhere.,0
1,Their regular toasted bread was equally satisf...,1
2,The Buffet at Bellagio was far from what I ant...,1
3,"And the drinks are WEAK, people!",0
4,-My order was not correct.,1


In [None]:
dataset.to_csv("Fresh Reviews Predicted.tsv", sep='\t', encoding='UTF-8', index=False)