In [3]:
# To put the data into the prettytable
from prettytable import PrettyTable
from collections import Counter
from matplotlib import pyplot as plt
import pandas as pd

from textblob import TextBlob
import re
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords

import nltk
nltk.download('stopwords')
nltk.download('vader_lexicon')

In [4]:
def getTopWordsForAllReviews(clean_reviews):
    word = []
    for text in clean_reviews:
        for w in text.split():
            word.append(w)
    filter_words = [word for word in word if word not in stopwords.words('english')]

    word_freq = Counter(filter_words)
    top_word = PrettyTable(field_names=['Word','Count']) # Header
    row_num = 0 # number of rows in table
    for item in word_freq.most_common()[:10]:
        top_word.add_row(item) # Adding rows
    top_word.align['Word'], top_word.align['Count'] = 'c', 'r' # Set column alignment
    print (top_word)
    return filter_words
    
def getPolarityOfAReview(review):

    # create TextBlob object of passed review text
    analysis = TextBlob(review)

    # set sentiment
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity == 0:
        return 'neutral'
    else:
        return 'negative'
    
def getSentimentOfAReview(clean_reviews):
    reviewAndSentiment = pd.DataFrame(columns=['Review','Sentiment'])
    for i in range(len(clean_reviews)):
        sentiment = getPolarityOfAReview(clean_reviews[i])
        reviewAndSentiment = reviewAndSentiment.append({'Review':clean_reviews[i], 'Sentiment':sentiment}, ignore_index=True)
    return reviewAndSentiment


def getPositiveAndNegativeReviews(reviewAndSentiment):
    previews = [reviewAndSentiment['Review'][i] for i in range(len(reviewAndSentiment)) if reviewAndSentiment['Sentiment'][i] == 'positive']

    nreviews = [reviewAndSentiment['Review'][i] for i in range(len(reviewAndSentiment)) if reviewAndSentiment['Sentiment'][i] == 'negative']
    
    percentagePositiveReviews = 100*len(previews)/len(reviewAndSentiment)
    percentageNegativeReviews = 100*len(nreviews)/len(reviewAndSentiment)
    
    print("Percentage of positive reviews =",percentagePositiveReviews)
    print("Percentage of negative reviews =",percentageNegativeReviews)
    
    return previews, nreviews
#print(get_review_sentiment('today is not a good day'))    


def getWords(text):
    return re.compile('\w+').findall(text)


def getPositiveNegativeWordList(clean_reviews):
    sid = SentimentIntensityAnalyzer()
    pos_word_list=[]
    neu_word_list=[]
    neg_word_list=[]

    wordsFromReviews = []

    for i in range(len(clean_reviews)):
        wordsFromReviews.append(getWords(clean_reviews[i]))

    wordsFromReviews = list(set(wordsFromReviews[0]))

    for word in wordsFromReviews:
        if (sid.polarity_scores(word)['compound']) >= 0.3:
            pos_word_list.append(word)
        elif (sid.polarity_scores(word)['compound']) <= -0.1:
            neg_word_list.append(word)
        else:
            neu_word_list.append(word)                

    return pos_word_list, neu_word_list, neg_word_list