Objective:

Create plotly charts from data viz 1

In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [17]:
org_keywords = [
    'associate', 'committee', 'the other 98%', 'party', 'republicans',
    'physicians', 'USA', 'association', 'league', 'post', 'partnership',
    'think big', 'illinois', 'children at risk', 'union', 'leaders',
    'republican', 'coalition', '.org', 'services', '.com', 'fund', 'campaign',
    'amendments', 'americans', 'unite', 'entertainment', 'initiative',
    'austin', 'pundit', 'senate', 'agency', 'office', 'PAC', 'voice', 'news',
    'chamber', 'headlines', 'project', 'fellowship', 'politics', '.info',
    'liberty', 'report', 'university', 'press', 'institute', 'times', 'daily',
    'portal', 'revolution', 'world', 'department', 'network', 'school',
    'resistance', 'administration', 'council', 'taxpayers', 'foundation',
    'afscme', 'district', 'american', 'A Stronger Wisconsin', 'aclu',
    'new jersey', 'AFL-CIO', 'aarp', 'government', 'activist', 'actionaid',
    'information', 'wisconsin', 'america', 'alliance', 'list', 'house',
    'democrats', 'policy', 'constitution', 'tax', 'Ax The Bev Tax', 'oregon',
    'group', 'NJ', 'lives', 'NC', '.net', 'ohio', 'burger king', 'cnn',
    'catpac', 'californian', 'healthcare', 'hospital', 'action', 'citizen',
    'city', 'county', 'portland', 'clean water', 'club', 'college',
    'university', 'voice', 'common sense', 'fund', 'conservative', 'consumer',
    'deeds', 'dccc', 'street', 'service', 'social', 'democratic', 'doctors',
    'florida', 'society', 'espn', 'group', 'fellowship', 'conference',
    'democracy', 'floridians', 'forecast', 'project', 'advocates', 'partners',
    '.us', 'freedom', 'friends', 'future45', 'generation', 'georgia', 'bureau',
    'owners', 'lottery', 'polititions', 'senators', 'georgian', 'texas',
    'humanity'
]

In [18]:
data = pd.read_csv("cleaned_data.csv")
data.head()

Unnamed: 0,statement,source,link,veracity,year,renamed_veracity,statement_mod
0,"Says that in 1770 ""British parliament banned l...",Facebook posts,/facebook-fact-checks/statements/2019/oct/25/f...,Pants on Fire!,2019,pants-fire,"that in 1770 ""British parliament banned lipsti..."
1,"Says Ann Landers said, ""At age 20, we worry ab...",Viral image,/facebook-fact-checks/statements/2019/oct/25/v...,False,2019,false,"Ann Landers said, ""At age 20, we worry about w..."
2,"""General Motors is making record profits.""",Glenn Kage,/missouri/statements/2019/oct/25/glenn-kage/ge...,Half-True,2019,half-true,"""General Motors is making record profits."""
3,"""14,000 abandoned wind turbines litter the Uni...",Chain email,/missouri/statements/2019/oct/25/chain-email/n...,Pants on Fire!,2019,pants-fire,"""14,000 abandoned wind turbines litter the Uni..."
4,"Says Congress gave Wall Street ""trillions of d...",Bernie Sanders,/truth-o-meter/statements/2019/oct/25/bernie-s...,Half-True,2019,half-true,"Congress gave Wall Street ""trillions of dollar..."


In [19]:
def source_type(x):
    for keyword in org_keywords:
        if keyword.lower() in x.lower() and 'facebook' not in x.lower():
            return 'Organization'
    return 'Individual'

df = data[["source", "renamed_veracity"]].copy()
df['source_type'] = df['source'].apply(lambda x: source_type(x))

In [31]:
top_5_sources = df.source.value_counts()[:5].index
for s in top_5_sources:
    num_obs = len(df[df.source==s])
    print(f"{s}: {num_obs}")

Donald Trump: 816
Barack Obama: 600
Bloggers: 502
Facebook posts: 339
Hillary Clinton: 297


In [32]:
for s in df.source_type.unique():
    num_obs = len(df[(df.source_type==s) & (~ df.source.isin(top_5_sources))])
    print(f"{s}: {num_obs}")

Individual: 11896
Organization: 2300


In [85]:
labels = [
    "Other Individuals", "Donald Trump", "Barack Obama", "Bloggers",
    "Facebook posts", "Hillary Clinton", "Organizations"
]
values = [11896, 816, 600, 502, 339, 297, 2300]
imgs = ["", "trump.png", "obama.png", "blog.png", "fb.png", "hillary_clinton.png", ""]
fig_df = pd.DataFrame({'labels': labels, 'values': values, 'imgs': imgs})

fig = go.Figure()
fig.add_trace(
    go.Pie(
        labels=labels,
        values=values,
        customdata=fig_df,
        hovertemplate='Source: %{customdata[0][0]}' +
        '<br>Number of observations: %{customdata[0][1]}'+
        '<extra></extra>', # this line is to prevent "trace 0" from appearing next to hover,
        texttemplate='%{percent:.1%f}',
        textposition='outside',
        hole=0.5,
        sort=False,
        direction='counterclockwise',
        
        legendgroup='',
        marker={
            'colors':
            ['#c6dbef','#9ecae1','#6baed6','#4292c6','#2171b5','#084594', '#addd8e']
        })
)

fig.update_layout(
    {
        'plot_bgcolor': 'rgba(0, 0, 0, 0)',
        'paper_bgcolor': 'rgba(0, 0, 0, 0)'
    },
    title={
        "text": "Source of data",
        "font_size": 18,
        "xanchor": "center",
        "yanchor": "top",
        "x": 0.5
    },
    height=700,
    width=800)
fig.show()