In [1]:
# Importing libraries
import pandas as pd
import numpy as np
import geopandas as gpd

In [2]:
import json
import pandas_bokeh
from bokeh.io import output_notebook, show
from bokeh.models import (CDSView, ColorBar, ColumnDataSource,
                          CustomJS, CustomJSFilter, Div,
                          GeoJSONDataSource, HoverTool,
                          LinearColorMapper, Slider)
from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import brewer
from bokeh.plotting import figure

from bokeh.plotting import figure
from bokeh.embed import file_html
from bokeh.models import Div, Paragraph, Row, Column
from bokeh.resources import CDN
from bokeh.util.browser import view
from jinja2 import Template

In [3]:
import os, glob
current_location = os.getcwd()
os.chdir(current_location)

In [4]:
# Read in shapefile
contiguous_usa = gpd.read_file("../shapefiles/cb_2018_us_state_20m.shp")

In [5]:
# Read csv files
df_covid = pd.read_csv("../data/raw_2_covid_latest.csv")
# Keep states which are in the shapefile contiguous_usa
df_covid = df_covid.loc[df_covid["State/Territory"].isin(contiguous_usa["NAME"])]

In [6]:
df_state = pd.read_csv("../data/raw_0_states.csv")
df_election_2016 = pd.read_csv("../data/raw_5_2016election.csv")

In [7]:
df_mail_2016 = pd.read_csv("../data/raw_6_1_2016mail.csv")
df_mail_2020 = pd.read_csv("../data/raw_8_2020_votebymail.csv")
df_mail_2020["Number absentee ballots returned"] = parse_str_int(df_mail_2020["Number absentee ballots returned"])

NameError: name 'parse_str_int' is not defined

In [None]:
df_mail = pd.merge(left=df_mail_2016, right=df_mail_2020, left_on='State', right_on='State')

In [None]:
df_mail = pd.merge(left=df_mail, right=df_election_2020[["States", "total_2020"]], left_on='State', right_on='States')

In [None]:
df_mail = df_mail.drop("States", axis = 1)

In [None]:
df_mail.columns = ["state", "total_2016", "mail_2016", "percent_mail_2016", 
                   "mail_2020", "total_2020"]
df_mail["percent_mail_2020"] = (df_mail["mail_2020"]/df_mail["total_2020"])

In [None]:
df_mail

In [None]:
def parse_str_int(col):
    new_col = col.str.replace(',', '').astype(int)
    return new_col

df_election_2016["TRUMP_votes"] = parse_str_int(df_election_2016["TRUMP_votes"])
df_election_2016["CLINTON_votes"] = parse_str_int(df_election_2016["CLINTON_votes"])
df_election_2016["total_votes"] = parse_str_int(df_election_2016["total_votes"])

df_election_2016["2016_win"] = np.where(df_election_2016['TRUMP_votes'] > df_election_2016['CLINTON_votes'], 'Republican', "Democratic")
df_election_2016["2016_percent"] = (df_election_2016["CLINTON_votes"]-df_election_2016["TRUMP_votes"])/df_election_2016["total_votes"]
df_election_2016.head()

In [None]:
df_election_2020 = pd.read_csv("../data/raw_3_2020election.csv")
df_election_2020["2020_win"] = np.where(df_election_2020['BIDEN_percent'] > df_election_2020['TRUMP_percent'], 'Democratic', 'Republican')
df_election_2020["2020_percent"] = df_election_2020['BIDEN_percent'] - df_election_2020['TRUMP_percent']


In [None]:
df_election_2020["total_2020"] = df_election_2020["BIDEN_votes"]/df_election_2020["BIDEN_percent"]
df_election_2020["total_2020"] = df_election_2020["total_2020"].astype(int)

In [None]:
df_election_2020

In [None]:
def color_category(col):
    
    r_25 = col[col<0].quantile(0.25)
    r_50 = col[col<0].quantile(0.50)
    r_75 = col[col<0].quantile(0.75)
    d_25 = col[col>=0].quantile(0.25)
    d_50 = col[col>=0].quantile(0.50)
    d_75 = col[col>=0].quantile(0.75)
    
    conditions = [
        (col<r_25),
        (col>=r_25) & (col<r_50),
        (col>=r_50) & (col<r_75),
        (col>=r_75) & (col<0),
        (col>=0) & (col<d_25),
        (col>=d_25) & (col<d_50),
        (col>=d_50) & (col<d_75),
        (col>=d_75)
    ]
    
    choices = [-8, -6, -4, -2, 2, 4, 6, 8]
    
    new_col = np.select(condlist=conditions, choicelist=choices)
    
    return new_col

In [None]:
df_election_2016["color_2016"] = color_category(df_election_2016["2016_percent"])
df_election_2020["color_2020"] = color_category(df_election_2020["2020_percent"])

In [None]:
df_election_2016 = pd.merge(left = df_election_2016[["state_code", "2016_win", "2016_percent", "color_2016"]], 
                            right = df_state[["state_code", "state"]],
                            left_on = 'state_code',
                            right_on = 'state_code')

In [None]:
df_merge = pd.merge(left=df_covid, right=df_election_2020, left_on='State/Territory', right_on='States')
df_merge = pd.merge(left=df_merge, right=df_election_2016, left_on='State/Territory', right_on='state')

In [None]:
# Merge shapefile with covid data
map_info = contiguous_usa.merge(df_merge, left_on = "NAME", right_on = "State/Territory")
# Drop Alaska and Hawaii
map_info = map_info.loc[~map_info['NAME'].isin(['Alaska', 'Hawaii'])]

In [None]:
df_merge.head()

In [None]:
# Input GeoJSON source that contains features for plotting
geosource = GeoJSONDataSource(geojson = map_info.to_json())

In [None]:
#output_notebook()

In [None]:
base_colors = ["#cb181d","#fb6a4a","#fcae91","#fee5d9","#eff3ff","#bdd7e7","#6baed6","#2171b5"]

# Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = base_colors,
                                 low = df_merge["color_2020"].min(),
                                 high = df_merge["color_2020"].max())

# Define custom tick labels for color bar.
tick_labels = {'-8': 'Trump wins',
               '-6':'',
               '-4':'',
               '-2':'', 
               '2':'',
               '4':'',
               '6':'',
               '8':'Biden wins'}
# Create color bar.
color_bar = ColorBar(color_mapper = color_mapper, 
                     label_standoff = 10,
                     width = 500, height = 20,
                     border_line_color = None,
                     location = (0,0), 
                     orientation = 'horizontal',
                     major_label_overrides = tick_labels)


# Create figure object
p = figure(title = 'COVID-19 cases & 2020 election', 
           plot_height = 600 ,
           plot_width = 950, 
           toolbar_location = 'below',
           tools = "pan, wheel_zoom, box_zoom, reset")
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# Add patch renderer to figure
states = p.patches('xs','ys', source = geosource,
                   fill_color = {'field' :'color_2020',
                                 'transform' : color_mapper},
                   line_color = "gray", 
                   line_width = 0.25, 
                   fill_alpha = 1)
# Create hover tool
p.add_tools(HoverTool(renderers = [states],
                      tooltips = [('State','@NAME'),
                                  #('B_win_percent', '@B_win_percent'),
                                  #('Win', '@win'),
                                  ('Case Rate per 100000','@{Case Rate per 100000}'),
                                  ('Confirmed cases','@{Total Cases}'),
                                  ('Total deaths','@{Total Deaths}')]))
# Specify layout
p.add_layout(color_bar, 'below')

#show(p)

In [None]:
# scatter plot
#Create Div with DataFrame:
div_df = Div(text=df_merge.head(10).to_html(index=False), 
             width=550)

#Create Scatterplot:
p_scatter = df_merge.plot_bokeh(
    kind="scatter",
    x="Total Cases",
    y="Total Deaths",
    category="2020_win",
    title="test",
    show_figure=False)
'''
#Combine Div and Scatterplot via grid layout:
pandas_bokeh.plot_grid([[div_df, p_scatter]], 
                       plot_width=400, 
                       plot_height=350)
'''

In [None]:
template = Template(
    """
    <!DOCTYPE html>
    <html lang="en">
        <head>
            <meta charset="utf-8">
            <title>{{ title if title else "Bokeh Plot" }}</title>
            {{ bokeh_css | safe }}
            {{ bokeh_js | safe }}
        </head>
        <body>
            {{ plot_div | safe }}
            {{ plot_script | safe }}
        </body>
    </html> 
    """)

#p1 = figure(plot_width = 400, plot_height = 400)
#p2 = figure(plot_width = 400, plot_height = 400)
p3 = figure(plot_width = 800, plot_height = 400)
#p1.circle([1, 2, 3], [4, 5, 6])
#p2.line([1, 2, 3], [4, 5, 6])
p3.line([1, 2, 3], [4, 5, 6])

html = file_html(Column(Row(p), Row(p_scatter), Row(p3)), template = template, resources = CDN)

output_file = 'test_interaction.html'
with open(output_file, 'w') as f:
    f.write(html)
view(output_file)

In [None]:
swing_states = ["Arizona", "Colorado", "Florida", "Georgia", "Iowa", 
"Michigan", "Minnesota", "Nevada", "New Hampshire", 
"North Carolina", "Ohio", "Pennsylvania", "Texas", "Wisconsin"]