In [None]:
import numpy as np
import pandas as pd

import boto3
import json

import nltk
from nltk import data
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer

import matplotlib.pyplot as plt
import seaborn as sns

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [None]:
pd.set_option('display.max_colwidth', -1)

nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True);

In [None]:
sentence_tokenizer = data.load('tokenizers/punkt/english.pickle')
english_stop_words = stopwords.words('english')
comprehend = boto3.client(
                          service_name='comprehend', 
                          region_name='us-west-2',
                         )
analyzer = SentimentIntensityAnalyzer()

In [None]:
topics = ['material quality',
         'battery life',
         'sound quality',
         'volume control',
         'tech support']

reviews_raw = pd.read_csv('review_details.csv',engine='python').dropna(subset=['review_content_text'])
reviews_deduped = reviews_raw.drop_duplicates(subset=['review_id'])
reviews_verified = reviews_deduped[reviews_deduped.is_verified == 1]
reviews_bluetooth = reviews_verified[(reviews_verified.review_asin == 'B074QLB1Y7') |
                                   (reviews_verified.review_asin == 'B00P24XKS8')]

reviews = reviews_verified.copy()

asins = list(reviews_verified.review_asin.unique())

In [None]:
def preprocess(corpus):
    lemmatizer = WordNetLemmatizer()

    return [' '.join([lemmatizer.lemmatize(word.lower()) for word in doc.split()
                      if word not in english_stop_words]) for doc in corpus]

def sentiment(score):
    # score [-1,1]
    if score > 0.05:
        return 'Positive'
    elif score < -0.05:
        return 'Negative'
    else:
        return 'Mixed'

In [None]:
def sentiment(score):
    # score [-1,1]
    if score > 0.05:
        return 'Positive'
    elif score < -0.05:
        return 'Negative'
    else:
        return 'Mixed'

for index, row in reviews.iterrows():
    review = row['review_content_text']
    sentences = sentence_tokenizer.tokenize(review)
    sentences_preprocessed = preprocess(sentences)
    topics_sentiment = []
    
    for topic in topics:
        sentences_topic = [sentence for sentence in sentences if topic in sentence]
        
        if sentences_topic:
            sentence_topic = ' '.join(sentences_topic)
            reviews.at[index, 'topic_{}'.format(topic)] = sentence_topic
            try:
                vs = analyzer.polarity_scores(sentence_topic)
                reviews.at[index, 'topic_{}_sentiment'.format(topic)] = sentiment(vs['compound'])
            except Exception as ex:
                print(ex)

# Amazon Review Sentiments by Topics

In [None]:
@interact
def filter_by_sentiment(ASIN = asins,
                        Topic=topics,
                        Sentiment = ['Positive','Negative','Mixed']):
    reviews_asin = reviews[reviews.review_asin == ASIN]
    reviews_topic = reviews[reviews['topic_{}'.format(Topic)].notnull()]
    reviews_topic['topic_{}_sentiment'.format(Topic)].value_counts().sort_index().plot(kind='bar', title=Topic)
    return reviews_asin.loc[reviews_asin['topic_{}_sentiment'.format(Topic)] == Sentiment][
        ['review_content_text','topic_{}'.format(Topic)]].set_index('review_content_text').reset_index()