In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
import seaborn as sns

from settings import *
from utils import *
from quoteAnalysis import *

In [None]:
#Run the pipeline to prepare the dataframe for the plots
documents = quotePipeline()

In [None]:
documents['quoteTopic'].value_counts()

In [None]:
def plotNumOfQuotes():
    
    #Filter (aesthetic purposes)
    countLimit = 0

    #Data manipulation
    total = pd.DataFrame(documents.groupby(['articleTopic']).size().sort_values().rename('count')).reset_index()
    total = total[total['count'] > countLimit]
    topics = total['articleTopic']
    person = pd.DataFrame(documents.query("quoteeType=='PERSON' & articleTopic in @topics").groupby(['articleTopic']).size().sort_values().rename('count')).reset_index()
    personorg = pd.DataFrame(documents.query("quoteeType in ['PERSON', 'ORG'] & articleTopic in @topics").groupby(['articleTopic']).size().sort_values().rename('count')).reset_index()

    #Aesthetics
    sns.set(style="whitegrid")
    f, ax = plt.subplots(figsize=(6, 6))

    #Barplots
    sns.set_color_codes("pastel")
    sns.barplot(x="count", y="articleTopic", data=total, ax=ax, label="total", color='b')
    sns.set_color_codes("muted")
    sns.barplot(x="count", y="articleTopic", data=personorg, ax=ax, label="organizations", color='b')
    sns.set_color_codes("dark")
    sns.barplot(x="count", y="articleTopic", data=person, ax=ax, label="persons", color='b')

    #Labels
    ax.set(ylabel="Topic of Article", xlabel="Number of Quotes")
    ax.legend(ncol=1, loc="upper right", frameon=True)
    sns.despine()
    locs, labels = plt.xticks()
    labels = [human_format(l) for l in locs]
    plt.xticks(locs, labels)

    #Depiction
    plt.savefig('plots/CountQuotes.png', dpi=1200, bbox_inches = 'tight')
    plt.show()
plotNumOfQuotes()

In [None]:
def plotHeatMap():
    
    #Data manipulation
    data = documents[documents['quoteSim']>0.6][['articleTopic', 'quoteTopic', 'quoteSim']].groupby(['articleTopic', 'quoteTopic']).count().reset_index().pivot(index='articleTopic', columns='quoteTopic', values='quoteSim').fillna(0)
        
    #Aesthetics
    sns.set(style="whitegrid")
    f, ax = plt.subplots(figsize=(8, 8))
    sns.set_color_codes("pastel")

    #Plot
    ax = sns.heatmap(data)

    #Labels
    ax.set(ylabel="Topic of Article", xlabel="Topic of Quote")
    
    #Depiction
    plt.savefig('plots/TopicsHeatMap.png', dpi=1200, bbox_inches = 'tight')
    plt.show()    
plotHeatMap()

In [None]:
def plotTopQuotees():
    
    #Data manipulation
    data = documents[documents['quoteeType'] == 'PERSON'][['quoteeType', 'quotee']].groupby(['quotee']).count().rename(columns={'quoteeType': 'count'}).sort_values(by='count', ascending=False).reset_index()
    data = data.head(15)
    
    #Aesthetics
    sns.set(style="whitegrid")
    f, ax = plt.subplots(figsize=(6, 6))

    #Barplots
    sns.set_color_codes("pastel")
    sns.barplot(x="count", y="quotee", data=data, ax=ax, label="total", color='b')

    #Labels
    ax.set(ylabel="Quotee Name", xlabel="Number of Quotes")
    sns.despine()
    locs, labels = plt.xticks()
    labels = [human_format(l) for l in locs]
    plt.xticks(locs, labels)

    #Depiction
    plt.savefig('plots/TopQuotees.png', dpi=1200, bbox_inches = 'tight')
    plt.show()
plotTopQuotees()