# Sentiment Analysis for Cellphone and Accessories category on Amazon 



In [6]:
import nltk
import numpy as np
from sklearn.utils import shuffle
import model_evaluation_utils as meu
import utils
np.set_printoptions(precision=2, linewidth=80)
import warnings
import spacy
import numpy as np
import pandas as pd
import pickle
import multiprocessing
from multiprocessing import Process

warnings.filterwarnings("ignore")
np.set_printoptions(precision=2, linewidth=80)

nlp = spacy.load('en_vecs', parse=False, tag=False, entity=False)
nltk.download('wordnet')
nltk.download('sentiwordnet')

PROCESSED_FILENAME= './data/amazon_reviews_processed.pickle' 
NWORKERS=16


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\rkaushik\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

# Load normalized data from processed file


In [None]:
f=open(PROCESSED_FILENAME, "rb")
dfdb = pickle.load(f)

#filter rows out that have less than 20 word tokens
dfdb = dfdb[dfdb['Clean_Review_Tokens'].apply(lambda x: len(x) >= 20)]


# Prune data for development if needed

In [None]:
trial=0

#subset for local runs, will remove on final runs or on server
five=(dfdb['overall'] == 5.0)
four=(dfdb['overall'] >= 4.0) & (dfdb['overall'] < 5.0)
three=(dfdb['overall'] == 3.0) & (dfdb['overall'] < 4.0)
two=(dfdb['overall'] == 2.0) & (dfdb['overall'] < 3.0)
one=(dfdb['overall'] == 1.0) & (dfdb['overall'] < 2.0)
zero=(dfdb['overall'] == 0.0) & (dfdb['overall'] < 1.0)

df=pd.DataFrame(columns = dfdb.columns)
if(trial>0):
    df=dfdb[five].iloc[0:trial]
    df=df.append(dfdb[four].iloc[0:trial])
    df=df.append(dfdb[two].iloc[0:trial])
    df=df.append(dfdb[one].iloc[0:trial])
    df=df.append(dfdb[zero].iloc[0:trial])
else:
    df=dfdb[five]
    df=df.append(dfdb[four])
    df=df.append(dfdb[two])
    df=df.append(dfdb[one])
    df=df.append(dfdb[zero])

#randomize dataset
df = shuffle(df)


# Sample processed data loaded, notice Cleaned Review

In [None]:
print('Total Rows on processed dataset: ' + str(len(df)))
print('Sample of processed dataset. Notice the column named Clean_Review');
df.head(20)



# Split train and test data


In [None]:
# take a peek at the data
reviews = np.array(df['Clean_Review'])
sentiments = np.array(df['sentiment'])
reviews_tokens = np.array(df['Clean_Review_Tokens'])

cutoff=round(len(df)*0.75)
# build train and test datasets
train_reviews = reviews[:cutoff]
train_reviews_tokens = reviews_tokens[:cutoff]

train_sentiments = sentiments[:cutoff]
train_sentiments=train_sentiments.astype('int')

test_reviews = reviews[cutoff:]
test_reviews_tokens = reviews_tokens[cutoff:]

test_sentiments = sentiments[cutoff:]
test_sentiments=test_sentiments.astype('int')

#sample_review_ids = [1000, 5000, 10000,15000,20000,25000,30000,35000,40000,45000,50000,60000]

# Sample train data loaded, notice Cleaned Review

In [None]:
print('Total Rows on train dataset: ' + str(len(train_reviews)))
print('Total Rows on test dataset: ' + str(len(test_reviews)))


# Sentiment Analysis with AFINN


In [9]:
from afinn import Afinn

afn = Afinn(emoticons=True) 


# Predict sentiment by AFINN for test dataset

In [11]:
sentiment_polarity = [afn.score(review) for review in test_reviews]
predicted_sentiments_afinn = [1 if score >= 1.0 else 0 for score in sentiment_polarity]

# Evaluate model performance of AFINN

In [12]:
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predicted_sentiments_afinn, 
                                  classes=[1, 0])

Model Performance metrics:
------------------------------
Accuracy: 0.7054
Precision: 0.7212
Recall: 0.7054
F1 Score: 0.6993

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.66      0.84      0.74      7587
    negative       0.78      0.56      0.65      7413

    accuracy                           0.71     15000
   macro avg       0.72      0.70      0.70     15000
weighted avg       0.72      0.71      0.70     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       6405     1182
        negative       3237     4176


  labels=level_labels),
  labels=level_labels))


# Sentiment Analysis with SentiWordNet


## Build SentiWordnet model (multiprocessing)
## Predict SentiWordnet sentiment for test dataset

In [16]:
#predicted_sentiments = [analyze_sentiment_sentiwordnet_lexicon(review, verbose=False) for review in test_reviews]

#number of observations
size=len(test_reviews)
#number of observations in each process
iterSize=round(size/NWORKERS)
#holds the processes
processes=[]

print('To process: ' + str(size) + ' across '+ str(NWORKERS) +' workers ')
 
i=0

predicted_sentiments_sn=[]
sentiments_holder=[None]*NWORKERS
parent_conn_holder=[]

for i in range(0,NWORKERS):
    start=i*iterSize
    stop=start+iterSize
    if(i==(NWORKERS-1)):
        stop=size
    #split df for parallel proc
    reviews_proc=test_reviews[start:stop]
    # creating a pipe 
    parent_conn, child_conn = multiprocessing.Pipe() 
    p = Process(target=utils.analyze_sentiment_sentiwordnet_lexicon_multiproc, args=(reviews_proc,child_conn,i))
    processes.append(p)
    p.start()
    parent_conn_holder.append(parent_conn)

    
for parent_conn in parent_conn_holder:
    review_iter=parent_conn.recv()
    sentiments_holder[review_iter[0]]=review_iter[1:]

for p in processes:
    p.join()

for item in sentiments_holder:
    for review in item:
        predicted_sentiments_sn.append(review)


# Evaluate Sentiwordnet model performance

In [17]:
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predicted_sentiments_sn, 
                                  classes=[1, 0])

Model Performance metrics:
------------------------------
Accuracy: 0.6776
Precision: 0.6804
Recall: 0.6776
F1 Score: 0.6758

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.66      0.75      0.70      7587
    negative       0.70      0.61      0.65      7413

    accuracy                           0.68     15000
   macro avg       0.68      0.68      0.68     15000
weighted avg       0.68      0.68      0.68     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       5679     1908
        negative       2928     4485


# Sentiment Analysis with VADER


## Build Vader model (multiprocessing)
## Predict Vader sentiment for test dataset

In [25]:

#predicted_sentiments = [analyze_sentiment_vader_lexicon(review, threshold=0.4, verbose=False) for review in test_reviews]

#number of observations
size=len(test_reviews)
#number of observations in each process
iterSize=round(size/NWORKERS)
#holds the processes
processes=[]

print('To process: ' + str(size) + ' across '+ str(NWORKERS) +' workers ')
 
i=0

predicted_sentiments_vader=[]
sentiments_holder=[None]*NWORKERS
parent_conn_holder=[]

for i in range(0,NWORKERS):
    start=i*iterSize
    stop=start+iterSize
    if(i==(NWORKERS-1)):
        stop=size
    #split df for parallel proc
    reviews_proc=test_reviews[start:stop]
    # creating a pipe 
    parent_conn, child_conn = multiprocessing.Pipe() 
    p = Process(target=utils.analyze_sentiment_vader_multiproc, args=(reviews_proc,0.4,child_conn,i))
    processes.append(p)
    p.start()
    parent_conn_holder.append(parent_conn)

    
for parent_conn in parent_conn_holder:
    review_iter=parent_conn.recv()
    sentiments_holder[review_iter[0]]=review_iter[1:]

for p in processes:
    p.join()

for item in sentiments_holder:
    for review in item:
        predicted_sentiments_vader.append(review)


# Evaluate Vader model performance

In [26]:
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predicted_sentiments_vader, 
                                  classes=[1, 0])





Model Performance metrics:
------------------------------
Accuracy: 0.6964
Precision: 0.704
Recall: 0.6964
F1 Score: 0.6929

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.67      0.80      0.73      7587
    negative       0.74      0.59      0.66      7413

    accuracy                           0.70     15000
   macro avg       0.70      0.70      0.69     15000
weighted avg       0.70      0.70      0.69     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       6066     1521
        negative       3033     4380
