In [4]:
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 26 23:02:39 2018

@author: takalyan
"""

import pandas as pd

# Import various modules for string cleaning
from bs4 import BeautifulSoup
import re
from nltk.corpus import stopwords
import string
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from afinn import Afinn
from textblob import TextBlob


import textacy
import spacy
import numpy as np

data_df = pd.read_csv('../data/amazon_echo_10pgs.csv')
nlp = spacy.load('en_core_web_sm')



def cleanup_review( review,stop_words=False,stemmer=False,lemma=False):
    # Function to convert a document to a sequence of words,
    # optionally removing stop words.  Returns a list of words.
    #
    # Remove HTML
    words = BeautifulSoup(review,"lxml").get_text()


    #Remove special chars in this scrape output for reviews
    #reviews are contained in ['...']
    words = re.sub("(\[\')|(\'\])","", words) 

    # Remove non-letters. Keep , and .
    words = re.sub("[^a-zA-Z,.']"," ", words) 
    #
    
    #return
    nomarkup  = words

    # Convert words to lower case and split them
    #words= words.lower()
    
    if lemma:
        lemmatizer = WordNetLemmatizer()
        words = lemmatizer.lemmatize(words)
        
    #tokenize
    words = words.split()
    #
    # .remove punctuation from each word
    #table = str.maketrans('', '', string.punctuation)
    #words = [w.translate(table) for w in words]

    # Optionally remove stop words (false by default)
    if stop_words:
        stops = set(stopwords.words("english"))
        words = [w for w in words if not w in stops]
    

    #stemming of words
    if stemmer:
        porter = PorterStemmer()
        words = [porter.stem(word) for word in words]

   

    clean_review = "".join([" "+i if not i.startswith("'") and i not in string.punctuation else i for i in words]).strip()
    
 
          
    return(clean_review,nomarkup)

i=0
j=0

def get_vader_sentiment(text,threshold=0.1,verbose=False):
    
        # analyze the sentiment for review
    analyzer = SentimentIntensityAnalyzer()
    scores = analyzer.polarity_scores(text)
    
   
    
    # get aggregate scores and final sentiment
    agg_score = scores['compound']
    
    #final_sentiment:
    # 1 'positive' if agg_score > 0
    # 0 neutral 
    #-1 negative 
            
    if (agg_score >= threshold):
        sentiment = 1
    elif (agg_score < threshold and agg_score >= 0) :
        sentiment = 0
    else :
        sentiment = -1
        
    if (verbose and i <= sentence_print_count) :
        print(text)
        print ("Vader Sentiment:")
        print(scores,"\nDetected sentiment:%s\n"%sentiment)
   
    return sentiment



afinn = Afinn()

def get_afinn_sentiment(text,verbose=False):
    
    afinn_score = afinn.score(text)
    
    
    #Score is negative for negative reviews and positive for positive reviews
    if (afinn_score > 0):
        sentiment = 1
    elif (afinn_score == 0) :
        sentiment = 0
    else : 
        sentiment = -1
    
    if (verbose and j <= sentence_print_count):
            print ("Afinn sentiment:")
            print (afinn_score,"\n","Detectec sentiment:%i\n"%sentiment)
            
    return sentiment

def get_textblob_sentiment (text,verbose=False):
    
    textblob_score = TextBlob(text).sentiment
    
  
    if (textblob_score.polarity > 0):
        sentiment = 1
    elif (textblob_score.polarity == 0) :
        sentiment = 0
    else: 
        sentiment = -1
    
    if (verbose and j <= sentence_print_count):
            print ("Textblob sentiment:")
            print (textblob_score.polarity,"\n","Detected sentiment:%i\n"%sentiment)
    
    
    return sentiment


reviews = ""

#Iterate through dataframe of reviews.
#Each row is a review
#Create one large corpus of all reviews
for index,row  in data_df.iterrows():
    clean_review,nomarkup_review = cleanup_review(row.body)
    reviews = reviews + clean_review 


#Create spacy doc of reviews
#doc = textacy.Doc(reviews,lang=u'en_core_web_sm')
spacy_doc = nlp(reviews)

pos_reviews = ""
neg_reviews = ""
neut_reviews = ""
sentence_print_count = 50

#Iterate through sentences in doc and get sentiment
for sentence in spacy_doc.sents :
    
    #extract sentence string for doc sent
    sentence_str = sentence.string.strip()
    
    #Add period to end of each sentence if not present
    #we will be appending all sentences 
    if (re.search('[^\.]$',sentence_str)) :
        sentence_str = sentence_str + "."
        
    #get sentiment using different lexical algorithms
    vader_sentiment = get_vader_sentiment(sentence_str,0.1,True)
    afinn_sentiment = get_afinn_sentiment(sentence_str,True)
    textblob_sentiment = get_textblob_sentiment(sentence_str,True)
    #increment counters for printing
    i=i+1
    j=j+1
    
    
    #assign sentences to positive/negative/neutral word groups
    if (textblob_sentiment == 1) :
        pos_reviews = pos_reviews + sentence_str
    elif (textblob_sentiment == 0) :
        neut_reviews = neut_reviews + sentence_str
    else :
        neg_reviews = neg_reviews + sentence_str
        
    

It's very useful but wish she could do more things.
Vader Sentiment:
{'neg': 0.0, 'neu': 0.57, 'pos': 0.43, 'compound': 0.722} 
Detected sentiment:1

Afinn sentiment:
3.0 
 Detectec sentiment:1

Textblob sentiment:
0.445 
 Detected sentiment:1

but it's really coolLove it.
Vader Sentiment:
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0} 
Detected sentiment:0

Afinn sentiment:
0.0 
 Detectec sentiment:0

Textblob sentiment:
0.2 
 Detected sentiment:1

New to smart technology for the home.
Vader Sentiment:
{'neg': 0.0, 'neu': 0.69, 'pos': 0.31, 'compound': 0.4019} 
Detected sentiment:1

Afinn sentiment:
1.0 
 Detectec sentiment:1

Textblob sentiment:
0.17532467532467533 
 Detected sentiment:1

but now I am addicted to using it for alarms, need briefing in the morning, and controlling my tech of course.
Vader Sentiment:
{'neg': 0.122, 'neu': 0.878, 'pos': 0.0, 'compound': -0.3919} 
Detected sentiment:-1

Afinn sentiment:
0.0 
 Detectec sentiment:0

Textblob sentiment:
-0.4 
 Detecte

{'neg': 0.0, 'neu': 0.537, 'pos': 0.463, 'compound': 0.7207} 
Detected sentiment:1

Afinn sentiment:
4.0 
 Detectec sentiment:1

Textblob sentiment:
0.16 
 Detected sentiment:1

Sorry I'm having trouble understanding right nowGood productThe echo dot as small compact and pretty neat, the amazon alexa voice assistant integrates really well with your home smart appliances and instantly converts your home to a smart home.
Vader Sentiment:
{'neg': 0.082, 'neu': 0.596, 'pos': 0.322, 'compound': 0.8932} 
Detected sentiment:1

Afinn sentiment:
0.0 
 Detectec sentiment:0

Textblob sentiment:
0.02380952380952381 
 Detected sentiment:1

I admit alexa needs to be worked on a bit to be an even more amazing product but for the price you can't complain.
Vader Sentiment:
{'neg': 0.0, 'neu': 0.742, 'pos': 0.258, 'compound': 0.682} 
Detected sentiment:1

Afinn sentiment:
1.0 
 Detectec sentiment:1

Textblob sentiment:
0.55 
 Detected sentiment:1

I loved my first gen Echo.
Vader Sentiment:
{'neg': 0.0,