# 1. Import Necessary Packages

In [1]:
import pandas as pd
import numpy as np

import sklearn

import normalization as nm
import FeatureEngineering as fe
import trainTestData as tsplit
import NaiveBayesClassifier as nb

import warnings 
warnings.filterwarnings("ignore", category=DeprecationWarning)

# 2. Load Data into DataFrame

In [2]:
pd.options.display.max_colwidth = 200

df = pd.DataFrame()
df = pd.read_csv('AmazonReviews.csv')
df.head()

Unnamed: 0,Reviewer,DateOfReview,Colour,Size,VerifiedPurchase,Rating,ReviewTitle,ReviewDescription
0,Amazon Customer,2018-10-31,Silver,256GB,True,1.0,Amazon Return support is too poor,"I purchased iphone X on 26th Oct'18 and started using from 28th Oct'18. When i received the product, it was already opened which i informed Amazon representative but i was told that it is original..."
1,ShowrobDa,2018-10-12,Space Grey,64GB,True,5.0,"An amazing deal at INR 69,999/-","This is my 3rd iPhone after iPhone 4S and iPhone 6S Plus.Being an utter working middle class guy, I don't switch my iPhones very often (my android phones barely lasted me 9 months though). I do co..."
2,Ktron,2018-10-11,Space Grey,256GB,True,5.0,Amazing beast(iPhone X 256 GB) for discount!!,"Thanks to Appario Retail Private Ltd, I got this 256 GB beast during amazon great Indian sale for under 80k. Additionally received cash back of Rs.1000. On delivery, I checked at imei24 and apple ..."
3,Amazon Customer,2018-11-02,Space Grey,64GB,True,1.0,Battery charging is very bad in my new iPhone X,I ordered new iPhone X grey on Amazon sale. Phone looks absolutely fine but battery is very weak though it has fast charging option my phone requires 4-5 hours to get charged completely n also my ...
4,Amritpal Singh,2018-10-24,Space Grey,64GB,True,1.0,Shame don't buy,"Apple iPhone X (Space Grey, 3GB RAM, 64GB Storage)Poor quality productDidn’t turn on at all after unboxingSHAME and waste of money"


# 3. PreProcess Text = df['ReviewDescription']

In [3]:
short_corpus = nm.build_corpus(df, ['ReviewTitle'])
long_corpus = nm.build_corpus(df, ['ReviewDescription'])
meta_corpus = nm.build_corpus(df, ['Reviewer','Colour','Size','ReviewTitle','ReviewDescription'])

#normalized corpus
norm_short_corpus = nm.normalize_corpus(short_corpus)
norm_long_corpus = nm.normalize_corpus(long_corpus)
norm_meta_corpus = nm.normalize_corpus(meta_corpus)

df['title_processed'] = norm_short_corpus
df['desc_processed']= norm_long_corpus
df['metaCorpus_processed'] = norm_meta_corpus

df.head()

Unnamed: 0,Reviewer,DateOfReview,Colour,Size,VerifiedPurchase,Rating,ReviewTitle,ReviewDescription,title_processed,desc_processed,metaCorpus_processed
0,Amazon Customer,2018-10-31,Silver,256GB,True,1.0,Amazon Return support is too poor,"I purchased iphone X on 26th Oct'18 and started using from 28th Oct'18. When i received the product, it was already opened which i informed Amazon representative but i was told that it is original...",amazon return support poor,purchase iphone x 26th oct18 start use 28th oct18 receive product already open inform amazon representative tell original product not issue within two day start give issue network wifi connectivit...,amazon customer silver 256 gb amazon return support poor purchase iphone x 26th oct18 start use 28th oct18 receive product already open inform amazon representative tell original product not issue...
1,ShowrobDa,2018-10-12,Space Grey,64GB,True,5.0,"An amazing deal at INR 69,999/-","This is my 3rd iPhone after iPhone 4S and iPhone 6S Plus.Being an utter working middle class guy, I don't switch my iPhones very often (my android phones barely lasted me 9 months though). I do co...",amazing deal inr 69999,3rd iphone iphone 4s iphone 6s plus utter work middle class guy not switch iphone often android phone barely last 9 month though consider extremely expensive would not go one unless feel worth mon...,showrobda space grey 64 gb amazing deal inr 69999 3rd iphone iphone 4s iphone 6s plus utter work middle class guy not switch iphone often android phone barely last 9 month though consider extremel...
2,Ktron,2018-10-11,Space Grey,256GB,True,5.0,Amazing beast(iPhone X 256 GB) for discount!!,"Thanks to Appario Retail Private Ltd, I got this 256 GB beast during amazon great Indian sale for under 80k. Additionally received cash back of Rs.1000. On delivery, I checked at imei24 and apple ...",amazing beast iphone x 256 gb discount,thank appario retail private ltd get 256 gb beast amazon great indian sale 80k additionally receive cash back rs 1000 delivery check imei24 apple site checkcoverage authenticity warranty product s...,ktron space grey 256 gb amazing beast iphone x 256 gb discount thank appario retail private ltd get 256 gb beast amazon great indian sale 80k additionally receive cash back rs 1000 delivery check ...
3,Amazon Customer,2018-11-02,Space Grey,64GB,True,1.0,Battery charging is very bad in my new iPhone X,I ordered new iPhone X grey on Amazon sale. Phone looks absolutely fine but battery is very weak though it has fast charging option my phone requires 4-5 hours to get charged completely n also my ...,battery charging bad new iphone x,order new iphone x grey amazon sale phone look absolutely fine battery weak though fast charge option phone require 4 5 hour get charge completely n also phone slow feel like get refurbish phone g...,amazon customer space grey 64 gb battery charging bad new iphone x order new iphone x grey amazon sale phone look absolutely fine battery weak though fast charge option phone require 4 5 hour get ...
4,Amritpal Singh,2018-10-24,Space Grey,64GB,True,1.0,Shame don't buy,"Apple iPhone X (Space Grey, 3GB RAM, 64GB Storage)Poor quality productDidn’t turn on at all after unboxingSHAME and waste of money",shame not buy,apple iphone x space grey 3 gb ram 64 gb storage poor quality productdidnt turn unboxingshame waste money,amritpal singh space grey 64 gb shame not buy apple iphone x space grey 3 gb ram 64 gb storage poor quality productdidnt turn unboxingshame waste money


# 4. Count Vectorizer: Get WordCount Distribution

In [10]:

cv, cv_features = fe.countVectorizer(norm_long_corpus[:500],max_df=1.0, min_df=0.01, max_features=34)
X_cv=cv_features
y_cv=df['Rating'][:500]

X_cv.shape

(500, 34)

# 5. Split Data into Train-Test set

In [11]:
X_train, X_test, y_train, y_test= tsplit.split_data(X_cv,y_cv)

In [12]:
X_train.shape

(350, 34)

In [13]:
y_train.shape

(350,)

# 6. (Multinomial) Naive Bayes Classifier for Sentiment Analysis

In [14]:
model = nb.nbTrainer(X_train, X_test, y_train, y_test)


[[86  0  0]
 [ 0 14  0]
 [ 0  0 50]]
             precision    recall  f1-score   support

        1.0       1.00      1.00      1.00        86
        3.0       1.00      1.00      1.00        14
        5.0       1.00      1.00      1.00        50

avg / total       1.00      1.00      1.00       150



In [15]:

preds = nb.nbPredictor(model, X_cv)
preds

array([-1.,  1.,  1., -1., -1.,  1., -1., -1.,  0., -1., -1.,  1.,  1.,
       -1., -1.,  1., -1., -1.,  0., -1., -1.,  1.,  1., -1., -1.,  1.,
       -1., -1.,  0., -1., -1.,  1.,  1., -1., -1.,  1., -1., -1.,  0.,
       -1., -1.,  1.,  1., -1., -1.,  1., -1., -1.,  0., -1., -1.,  1.,
        1., -1., -1.,  1., -1., -1.,  0., -1., -1.,  1.,  1., -1., -1.,
        1., -1., -1.,  0., -1., -1.,  1.,  1., -1., -1.,  1., -1., -1.,
        0., -1., -1.,  1.,  1., -1., -1.,  1., -1., -1.,  0., -1., -1.,
        1.,  1., -1., -1.,  1., -1., -1.,  0., -1., -1.,  1.,  1., -1.,
       -1.,  1., -1., -1.,  0., -1., -1.,  1.,  1., -1., -1.,  1., -1.,
       -1.,  0., -1., -1.,  1.,  1., -1., -1.,  1., -1., -1.,  0., -1.,
       -1.,  1.,  1., -1., -1.,  1., -1., -1.,  0., -1., -1.,  1.,  1.,
       -1., -1.,  1., -1., -1.,  0., -1., -1.,  1.,  1., -1., -1.,  1.,
       -1., -1.,  0., -1., -1.,  1.,  1., -1., -1.,  1., -1., -1.,  0.,
       -1., -1.,  1.,  1., -1., -1.,  1., -1., -1.,  0., -1., -1

# Predict for All Dataset

In [18]:
cv, cv_features = fe.countVectorizer(norm_long_corpus,max_df=1.0, min_df=0.01, max_features=34)
X = cv_features

senti = nb.nbPredictor(model, X)
df['Sentiment'] = senti

# Convert DataFrame to JSON

In [19]:
reviewJSON = df.to_json(orient='records',path_or_buf='sentimentAnalysis.json')