In [1]:
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
import itertools  
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import operator
import math
from IPython.display import display
from io import BytesIO
import base64

In [2]:

def fig_to_uri(in_fig, close_all=True, **save_args):
    # type: (plt.Figure) -> str
    """
    Save a figure as a URI
    :param in_fig:
    :return:
    """
    out_img = BytesIO()
    in_fig.savefig(out_img, format='png', **save_args)
    if close_all:
        in_fig.clf()
        plt.close('all')
    out_img.seek(0)  # rewind file
    encoded = base64.b64encode(out_img.read()).decode("ascii").replace("\n", "")
    return "data:image/png;base64,{}".format(encoded)

In [43]:
def show_plots(values,key,total_percentage_of_kwd,dropdown_value):
    pos_list = []
    neg_list = []
    neu_list = []
    positive_out={}
    negative_out={}
    neutral_out={}
    analyzer = SentimentIntensityAnalyzer()

    for sentence in values:
        vs = analyzer.polarity_scores(sentence)

        vs.pop('compound')

        senti = max(vs.items(), key=operator.itemgetter(1))[0]
        if senti == "pos":
                pos_list.append(sentence)
        elif senti == "neg":
                neg_list.append(sentence)
        else:
                neu_list.append(sentence)
    positive_plot = dict(Counter(pos_list))
    negative_plot = dict(Counter(neg_list))
    neutral_plot = dict(Counter(neu_list))
    positive_d = dict(sorted(positive_plot.items(), key=lambda x: (-x[1], x[0])))
    negative_d = dict(sorted(negative_plot.items(), key=lambda x: (-x[1], x[0])))
    neutral_d = dict(sorted(neutral_plot.items(), key=lambda x: (-x[1], x[0])))
    
    # Definig keys
    pos_keys=list(positive_d.keys())
    neg_keys = list(negative_d.keys())
    neu_keys = list(neutral_d.keys())
    
    if dropdown_value=='Top 10' or dropdown_value is None:
        positive_out = dict(itertools.islice(positive_d.items(), 10))  
        negative_out = dict(itertools.islice(negative_d.items(), 10))
        neutral_out = dict(itertools.islice(neutral_d.items(), 10))
    else:
        if dropdown_value =='10-20':
            pos_keys = pos_keys[10:20]
            neg_keys = neg_keys[10:20]
            neu_keys = neu_keys[10:20]
    
        if dropdown_value == "20-30":
            pos_keys = pos_keys[20:30]
            neg_keys = neg_keys[20:30]
            neu_keys = neu_keys[20:30]
    
        if dropdown_value == "30-40":
            pos_keys = pos_keys[30:40]
            neg_keys = neg_keys[30:40]
            neu_keys = neu_keys[30:40]
    
        if dropdown_value == "40-50":
            pos_keys = pos_keys[40:50]
            neg_keys = neg_keys[40:50]
            neu_keys = neu_keys[40:50]
        
        if dropdown_value == "Last 10-20":
            pos_keys = pos_keys[-20:-10:1]
            neg_keys = neg_keys[-20:-10:1]
            neu_keys = neu_keys[-20:-10:1]

        if dropdown_value =="Last 10":
            pos_keys = pos_keys[-10:]
            neg_keys = neg_keys[-10:]
            neu_keys = neu_keys[-10:]
    

        for key1 in pos_keys:
            positive_out[key1] = positive_d[key1]
        for key1 in neg_keys:
            negative_out[key1] = negative_d[key1]
        for key1 in neu_keys:
            neutral_out[key1] = neutral_d[key1]
       
       
    positive_percentage = round((len(pos_list)/len(values))*100)
    negative_percentage = round((len(neg_list)/len(values))*100)
    neutral_percentage = round((len(neu_list)/len(values))*100)
    
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3,figsize = (23,10))
    title = key.upper()

    ax1.bar(list(positive_out.keys()),list(positive_out.values()),color = '#32CD32')
    ax1.set(xlabel='Key-Phrases',ylabel = 'Frequency' )
    ax1.set_title("Positive ["+str(positive_percentage)+"% ]",fontdict={'fontsize': 20})

   
    ax2.bar( list(negative_out.keys()),list(negative_out.values()),color='#FF6347')
    ax2.set(xlabel='Key-Phrases',title = 'Negative Keywords',ylabel = 'Frequency')
    ax2.set_title("Negative ["+str(negative_percentage)+"% ]",fontdict={'fontsize': 20})
    
    ax3.bar(list(neutral_out.keys()),list(neutral_out.values()),color = '#bbbbbb')
    ax3.set(xlabel='Key-Phrases',ylabel = 'Frequency' )
    ax3.set_title("Neutral ["+str(neutral_percentage)+"% ]",fontdict={'fontsize': 20})

    plt.setp(ax1.get_xticklabels(), rotation=50, horizontalalignment='right',fontsize = 14)
    plt.setp(ax2.get_xticklabels(), rotation=50, horizontalalignment='right',fontsize = 14)
    plt.setp(ax3.get_xticklabels(), rotation=50, horizontalalignment='right',fontsize = 14)

    plt.setp(ax1.get_yticklabels(),fontsize = 14)
    plt.setp(ax2.get_yticklabels(),fontsize = 14)
    plt.setp(ax3.get_yticklabels(),fontsize = 14)

    ax1.xaxis.label.set_size(20)
    ax1.yaxis.label.set_size(20)

    ax2.xaxis.label.set_size(20)
    ax2.yaxis.label.set_size(20)
    
    ax3.xaxis.label.set_size(20)
    ax3.yaxis.label.set_size(20)
    plt.tight_layout()
    
    out_url = fig_to_uri(fig)
    return out_url,title

In [44]:
def resulting_charts(result,dropdown_value):
    tot_phrase_per_kwd = {}
    for item in result:
        tot_phrase_per_kwd[item] = len(list(result[item]))
    total_phrases = sum(tot_phrase_per_kwd.values())
    for key in result.keys():
        values = result[key]
        total_percentage_of_kwd = round((tot_phrase_per_kwd[key]/total_phrases)*100)
        plot,title = show_plots(values,key,total_percentage_of_kwd,dropdown_value)
        return plot,title
        

In [None]:
def all_words(result):
    for key,value in result.items():
        value = set(value)
        title = key.upper()
        return value,title