In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from PIL import Image
import plotly.offline as py

org_keywords = [
    'associate', 'committee', 'the other 98%', 'party', 'republicans',
    'physicians', 'USA', 'association', 'league', 'post', 'partnership',
    'think big', 'illinois', 'children at risk', 'union', 'leaders',
    'republican', 'coalition', '.org', 'services', '.com', 'fund', 'campaign',
    'amendments', 'americans', 'unite', 'entertainment', 'initiative',
    'austin', 'pundit', 'senate', 'agency', 'office', 'PAC', 'voice', 'news',
    'chamber', 'headlines', 'project', 'fellowship', 'politics', '.info',
    'liberty', 'report', 'university', 'press', 'institute', 'times', 'daily',
    'portal', 'revolution', 'world', 'department', 'network', 'school',
    'resistance', 'administration', 'council', 'taxpayers', 'foundation',
    'afscme', 'district', 'american', 'A Stronger Wisconsin', 'aclu',
    'new jersey', 'AFL-CIO', 'aarp', 'government', 'activist', 'actionaid',
    'information', 'wisconsin', 'america', 'alliance', 'list', 'house',
    'democrats', 'policy', 'constitution', 'tax', 'Ax The Bev Tax', 'oregon',
    'group', 'NJ', 'lives', 'NC', '.net', 'ohio', 'burger king', 'cnn',
    'catpac', 'californian', 'healthcare', 'hospital', 'action', 'citizen',
    'city', 'county', 'portland', 'clean water', 'club', 'college',
    'university', 'voice', 'common sense', 'fund', 'conservative', 'consumer',
    'deeds', 'dccc', 'street', 'service', 'social', 'democratic', 'doctors',
    'florida', 'society', 'espn', 'group', 'fellowship', 'conference',
    'democracy', 'floridians', 'forecast', 'project', 'advocates', 'partners',
    '.us', 'freedom', 'friends', 'future45', 'generation', 'georgia', 'bureau',
    'owners', 'lottery', 'polititions', 'senators', 'georgian', 'texas',
    'humanity'
]

data = pd.read_csv("cleaned_data.csv")


def source_type(x):
    for keyword in org_keywords:
        if keyword.lower() in x.lower() and 'facebook' not in x.lower():
            return 'Organization'
    return 'Individual'


df = data[["source", "renamed_veracity"]].copy()
df['source_type'] = df['source'].apply(lambda x: source_type(x))
top_5_sources = df.source.value_counts()[:5].index
labels = [
    "Other Individuals", "Donald Trump", "Barack Obama", "Bloggers",
    "Facebook posts", "Hillary Clinton", "Organizations"
]
values = [11896, 816, 600, 502, 339, 297, 2300]

fig_df = pd.DataFrame({'labels': labels, 'values': values})

fig = go.Figure()
fig.add_trace(
    go.Pie(
        labels=labels,
        values=values,
        customdata=fig_df,
        hovertemplate='Source: %{customdata[0][0]}' +
        '<br>Number of observations: %{customdata[0][1]}' +
        '<extra></extra>',  # this line is to prevent "trace 0" from appearing next to hover,
        texttemplate='%{percent:.1%f}',
        textposition='inside',
        textfont={'size': 20},
        hole=0.4,
        sort=False,
        direction='counterclockwise',
        legendgroup='',
        marker={
            'colors': [
                '#c6dbef', '#9ecae1', '#6baed6', '#4292c6', '#2171b5',
                '#084594', '#addd8e'
            ]
        }))

fig.add_annotation(text="86.3%",
                   xref="paper",
                   yref="paper",
                   x=0.5,
                   y=0.55,
                   showarrow=False,
                   font={
                       'size': 45,
                       'color': "black"
                   })
fig.add_annotation(text="of total data<br>came from individuals",
                   xref="paper",
                   yref="paper",
                   x=0.5,
                   y=0.45,
                   showarrow=False,
                   font={
                       'size': 16,
                       'color': "black"
                   })

fig.add_layout_image(source=Image.open("trump.png"),
                     xref="paper",
                     yref="paper",
                     x=0.51,
                     y=0.94,
                     sizex=0.13,
                     sizey=0.13,
                     xanchor="right",
                     yanchor="bottom")
fig.add_layout_image(source=Image.open("obama.png"),
                     xref="paper",
                     yref="paper",
                     x=0.36,
                     y=0.89,
                     sizex=0.13,
                     sizey=0.13,
                     xanchor="right",
                     yanchor="bottom")
fig.add_layout_image(source=Image.open("blog.png"),
                     xref="paper",
                     yref="paper",
                     x=0.25,
                     y=0.86,
                     sizex=0.13,
                     sizey=0.13,
                     xanchor="right",
                     yanchor="bottom")
fig.add_layout_image(source=Image.open("fb.png"),
                     xref="paper",
                     yref="paper",
                     x=0.13,
                     y=0.81,
                     sizex=0.08,
                     sizey=0.08,
                     xanchor="right",
                     yanchor="bottom")
fig.add_layout_image(source=Image.open("hillary_clinton.png"),
                     xref="paper",
                     yref="paper",
                     x=0.15,
                     y=0.71,
                     sizex=0.25,
                     sizey=0.25,
                     xanchor="right",
                     yanchor="bottom")
fig.update_xaxes(showgrid=False, showline=False, visible=False)
fig.update_yaxes(showgrid=False, showline=False, visible=False)

fig.update_layout(
    {
        'plot_bgcolor': 'rgba(0, 0, 0, 0)',
        'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    },
    title={
        "text": "Source of Data",
        "font_size": 35,
        "xanchor": "center",
        "yanchor": "top",
        "x": 0.5,
        "y": 1,
        "pad": {
            'b': 50,
            't': 10
        }
    },
    legend={
        "xanchor": "right",
        "yanchor": "top",
        "x": 1.2,
        "y": 0.9
    },
    height=900,
    width=800)
py.plot(fig)

fig = make_subplots(rows=5, cols=1, specs=[[{"type": "pie"}]] * 5)
veracity_levels = [
    "true", "mostly-true", "half-true", "barely-true", "mostly-false", "false",
    "pants-fire"
]

imgs = ["trump.png", "obama.png", "blog.png", "fb.png", "hillary_clinton.png"]
for i in range(5):
    df_source = df[df.source == top_5_sources[i]]
    values = []
    for level in veracity_levels:
        values.append(len(df_source[df_source.renamed_veracity == level]))

    custom_data = pd.DataFrame({
        'veracity_level': veracity_levels,
        'num_obs': values
    })
    fig.add_trace(
        go.Pie(
            labels=veracity_levels,
            values=values,
            customdata=custom_data,
            hovertemplate='Veracity: %{customdata[0][0]}' +
            '<br>Number of observations: %{customdata[0][1]}' +
            '<extra></extra>',  # this line is to prevent "trace 0" from appearing next to hover,
            texttemplate='%{percent:.1%f}',
            textposition='inside',
            textfont={'size': 15},
            hole=0.55,
            sort=False,
            direction='counterclockwise',
            legendgroup='',
            marker={
                'colors': [
                    '#08519c', '#3182bd', '#6baed6', '#bdd7e7', '#fdbe85',
                    '#fd8d3c', '#e6550d'
                ]
            }),
        row=i + 1,
        col=1)

fig.add_layout_image(source=Image.open("trump.png"),
                     xref="paper",
                     yref="paper",
                     x=0.59,
                     y=0.91,
                     sizex=0.18,
                     sizey=0.18,
                     xanchor="right",
                     yanchor="bottom")
fig.add_layout_image(source=Image.open("obama.png"),
                     xref="paper",
                     yref="paper",
                     x=0.57,
                     y=0.685,
                     sizex=0.15,
                     sizey=0.15,
                     xanchor="right",
                     yanchor="bottom")
fig.add_layout_image(source=Image.open("blog.png"),
                     xref="paper",
                     yref="paper",
                     x=0.57,
                     y=0.48,
                     sizex=0.15,
                     sizey=0.15,
                     xanchor="right",
                     yanchor="bottom")
fig.add_layout_image(source=Image.open("fb.png"),
                     xref="paper",
                     yref="paper",
                     x=0.545,
                     y=0.27,
                     sizex=0.1,
                     sizey=0.1,
                     xanchor="right",
                     yanchor="bottom")
fig.add_layout_image(source=Image.open("hillary_clinton.png"),
                     xref="paper",
                     yref="paper",
                     x=0.67,
                     y=0.049,
                     sizex=0.35,
                     sizey=0.35,
                     xanchor="right",
                     yanchor="bottom")
fig.update_xaxes(showgrid=False, showline=False, visible=False)
fig.update_yaxes(showgrid=False, showline=False, visible=False)
fig.update_layout(
    {
        'plot_bgcolor': 'rgba(0, 0, 0, 0)',
        'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    },
    title={
        "text": "Veracity Levels Across Top 5 Sources",
        "font_size": 35,
        "xanchor": "center",
        "yanchor": "top",
        "x": 0.5,
        "y": 1,
        "pad": {
            'b': 50,
            't': 10
        }
    },
    legend={
        "xanchor": "right",
        "yanchor": "top",
        "x": 1.2,
        "y": 0.9
    },
    height=1800,
    width=800)
py.plot(fig)

'temp-plot.html'