In [1]:
# Importing libraries
import pandas as pd
import numpy as np
import geopandas as gpd
import json
import pandas_bokeh
from bokeh.io import output_notebook, show, reset_output
from bokeh.models import (CDSView, ColorBar, ColumnDataSource,
                          CustomJS, CustomJSFilter, Div,
                          GeoJSONDataSource, HoverTool,
                          LinearColorMapper, Slider,
                          LogColorMapper, Legend, Title)
from bokeh.layouts import column, row, widgetbox
from bokeh.palettes import brewer
from bokeh.plotting import figure
from bokeh.transform import dodge, factor_cmap
from bokeh.plotting import figure
from bokeh.embed import file_html
from bokeh.models import Div, Paragraph, Row, Column
from bokeh.resources import CDN
from bokeh.util.browser import view
from jinja2 import Template
import os, glob
current_location = os.getcwd()
os.chdir(current_location)

In [2]:
# define swing states
swing_states = ["Arizona", "Colorado", "Florida", "Georgia", "Iowa", "Michigan", 
                "Minnesota", "Nevada", "New Hampshire", "North Carolina", "Ohio", 
                "Pennsylvania", "Texas", "Wisconsin"]

In [3]:
# Read files
contiguous_usa = gpd.read_file("../shapefiles/cb_2018_us_state_20m.shp")
df_covid = pd.read_csv("../data/raw_2_covid_latest.csv")
df_covid_daily = pd.read_csv("../data/raw_1_covid_daily.csv")
df_election = pd.read_csv("../data/use_election.csv")
df_state = pd.read_csv("../data/raw_0_states.csv")

# Keep states which are in the shapefile contiguous_usa
df_covid = df_covid.loc[df_covid["State/Territory"].isin(contiguous_usa["NAME"])]
df_election = df_election.loc[df_election["state"].isin(contiguous_usa["NAME"])]

# process election and covid data
df_covid_election = pd.merge(left=df_covid, right=df_election, how='right', 
                             left_on='State/Territory', right_on='state')
df_covid_election["swing_state_2020"]= np.where(df_covid_election["state"].isin(swing_states), 
                                                df_covid_election['color_2020'], np.nan)
df_covid_election["swing_state_2016"]= np.where(df_covid_election["state"].isin(swing_states), 
                                                df_covid_election['color_2016'], np.nan)
# process daily covid data
df_covid_daily = pd.merge(left=df_covid_daily, right=df_state[["state_code", "state"]], 
                             left_on='state_code', right_on='state_code')
df_covid_daily['date'] = pd.to_datetime(df_covid_daily['date'], format='%m/%d/%Y')

In [4]:
# data for swing states
df_covid_daily_swing = df_covid_daily[df_covid_daily["state"].isin(swing_states)]
df_covid_daily_swing = pd.merge(left=df_covid_daily_swing, 
                                right=df_election[["state", "win_2016", "win_2020"]], 
                                left_on='state', right_on='state')

In [5]:
'''
function: make a map to show the swing states

source_df: (df)the data to show in the map
shapefile: (shp)to make a map
field: (df's col -> int) the variable used to fill colors
range_col: (df's col -> int) the variable used to map numbers in a range, into a sequence of colors (the condition of results)
hover_list: (list) 
title: (string)the title of the map
'''

def make_plot_map(source_df, shapefile, field, range_col, hover_list, title):
    
    # Merge shapefile with covid data
    map_info = shapefile.merge(source_df, left_on = "NAME", right_on = "state")
    # Drop Alaska and Hawaii
    map_info = map_info.loc[~map_info['NAME'].isin(['Alaska', 'Hawaii'])]
    # Input GeoJSON source that contains features for plotting
    geosource = GeoJSONDataSource(geojson = map_info.to_json())

    base_colors = ["#cb181d","#fb6a4a","#fcae91","#fee5d9","#eff3ff","#bdd7e7","#6baed6","#2171b5"]
    # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
    color_mapper = LinearColorMapper(palette = base_colors,
                                     low = source_df[range_col].min(),
                                     high = source_df[range_col].max())

    # Define custom tick labels for color bar.
    tick_labels = {'-8': 'Trump wins',
                   '-6':'',
                   '-4':'',
                   '-2':'', 
                   '2':'',
                   '4':'',
                   '6':'',
                   '8':'Biden wins'}

    # Create color bar.
    color_bar = ColorBar(color_mapper = color_mapper, 
                         label_standoff = 5,
                         width = 200, height = 10,
                         border_line_color = None,
                         location = (0,0), 
                         orientation = 'horizontal',
                         major_label_overrides = tick_labels
                        )


    # Create figure object
    p = figure(title = title, 
                      plot_height = 400,
                      plot_width = 600,
                      toolbar_location=None
                      #toolbar_location = 'below',
                      #tools = "pan, wheel_zoom, box_zoom, reset"
                      )
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None

    # Add patch renderer to figure
    states = p.patches('xs','ys', source = geosource,
                               fill_color = {'field' :field,
                                             'transform' : color_mapper},
                               line_color = "gray", 
                               line_width = 0.25, 
                               fill_alpha = 1)
    # Create hover tool
    p.add_tools(HoverTool(renderers = [states], tooltips = hover_list))

    # Specify layout
    p.add_layout(color_bar, 'below')
    #p.title.text_color = "#7D3C98"
    p.title.text_font_size = "15px"
    #p.border_fill_color = "whitesmoke"
    p.background_fill_color = "beige"
    sub_text = Title(text="", align='left', text_font_size='12px', text_color = "#A6ACAF")
    p.add_layout(sub_text, 'below')
    return p

In [6]:
"""
function: make a scatter plot to show the relationship among COVID-19 positive cases, 
          deaths and election results in the swing states
          
source_df: (dataframe)
category_list: (list)
color_col, x_col, y_col:
color_palette
hover_list:
x_label, y_label:
title
subtitle
"""
def make_plot_scatter(source_df, category_list, color_col, color_palette,
                      x_col, y_col, hover_list, x_label, y_label, title, subtitle):
    p = figure(plot_height = 400, toolbar_location=None)
    p.scatter(x=x_col, y=y_col,
              source=source_df,
              color=factor_cmap(color_col, palette = color_palette, factors = category_list),
              size=10, legend=color_col)

    p.title.text = title
    p.xaxis.axis_label = x_label
    p.yaxis.axis_label = y_label

    hover = HoverTool()
    hover.tooltips = hover_list
    p.background_fill_color = "beige"
    
    p.add_tools(hover)
    p.legend.location = "top_left"
    #p.border_fill_color = "whitesmoke"
    p.title.text_font_size = "15px"
    
    sub_text = Title(text=subtitle, align='left', text_font_size='12px', text_color = "#A6ACAF")
    p.add_layout(sub_text, 'above')
    sub_text = Title(text="", align='left', text_font_size='12px', text_color = "#A6ACAF")
    p.add_layout(sub_text, 'below')
    
    return p

In [7]:
'''
function: make a bar chart to compare the percentage of turnout by mail in 2016 and 2020 election

source_df: (df)
x_axis_list: (list)
y1, y2: (string)
y1_label, y2_label: (string)
hover_list: (list) 
title: (string)the title of the bar chart

'''
def make_plot_bar(source_df, x_axis_list, title, y1, y2, y1_label, y2_label, hover_list):
    
    # change dataframe to ColumnDataSource for Bokeh
    source = ColumnDataSource(data=source_df)

    p = figure(x_range=x_axis_list, y_range=(0, 1), plot_height=400, plot_width=1200, title=title,
               toolbar_location=None, tools="hover", tooltips=hover_list)

    p.vbar(x=dodge("state", -0.15, range=p.x_range), top=y1, width=0.2, source=source,
           color="#CAAD8D", legend_label=y1_label, name='win_2016')

    p.vbar(x=dodge("state",  0.15,  range=p.x_range), top=y2, width=0.2, source=source,
           color="#F4D03F", legend_label=y2_label, name='win_2020')

    hover = HoverTool()
    hover.tooltips = hover_list
    p.add_tools(hover)

    #p.x_range.range_padding = 0.2
    p.xgrid.grid_line_color = None
    p.legend.location = "top_right"
    p.legend.orientation = "horizontal"

    #p.border_fill_color = "whitesmoke"
    p.title.text_font_size = "15px"
    p.background_fill_color = "beige"
    sub_text = Title(text="", align='left', text_font_size='12px', text_color = "#A6ACAF")
    p.add_layout(sub_text, 'below')
    return p

In [8]:
'''
function: make a time series chart to show the total cumulative cases for states where 
          each party won in the election

source_df: (df)
group_col: (string) group by date and what 
use_col: (string) the variables to show in the plot
y_label: (string)
title: (string)the title of the bar chart
hover_list: (list)
'''

def make_plot_time_series(source_df, group_col, use_col, y_label, title, hover_list):
    # make groups
    grouped = df_covid_daily_swing.groupby(['date', group_col])[use_col].sum().reset_index()
    grouped[use_col] = grouped[use_col]/1000
    r_group = grouped[grouped[group_col]=="Republican"]
    d_group = grouped[grouped[group_col]=="Democratic"]

    # Create a ColumnDataSource object for each group
    r_cds = ColumnDataSource(r_group)
    d_cds = ColumnDataSource(d_group)


    # Create and configure the figure
    p = figure(x_axis_type='datetime',
                 plot_height=400, plot_width=600,
                 title=title,
                 x_axis_label='Date', y_axis_label=y_label,
                 toolbar_location=None)

    # Render the race as step lines
    p.line('date', use_col, line_width=3,
             color="#5DADE2", legend_label='Democratic', 
             source=d_cds)
    p.line('date', use_col, line_width=3,
             color="#EC7063", legend_label='Republican', 
             source=r_cds)


    hover = HoverTool(tooltips=hover_list,
                      formatters={'@date': 'datetime'})

    p.add_tools(hover)

    # Move the legend to the upper left corner

    p.legend.location = 'top_left'
    #p.border_fill_color = "whitesmoke"
    p.title.text_font_size = "15px"
    p.background_fill_color = "beige"
    # Show the plot
    return p

In [9]:
#output_notebook()

In [10]:
# map 1
hover_list = [('State','@NAME')]
plot_1 = make_plot_map(df_covid_election, contiguous_usa, 
                           'swing_state_2020', 'color_2020', hover_list,
                           '2020 Election Result of Swing States')

In [11]:
# map 2
hover_list = [('State','@NAME')]
plot_2 = make_plot_map(df_covid_election, contiguous_usa, 
                           'swing_state_2016', 'color_2016', hover_list,
                           '2016 Election Result of Swing States')

In [12]:
# scatter plot 1
category_list = ['Democratic', 'Republican']
source_df = df_covid_election[df_covid_election['state'].isin(swing_states)]
x_col='Total Cases'
y_col='Total Deaths'
hover_list = [('State', '@state'),
              ('Total Cases', '@{Total Cases}'),
              ('Total Deaths', '@{Total Deaths}')
            ]
color_col = 'win_2020'
color_palette = ["#5DADE2","#EC7063"]
title = 'Total cases and total deaths in swing states'
subtitle = "colors from 2020 election results"
x_label = 'the number of total cases'
y_label = 'the number of total deaths'
plot_3 = make_plot_scatter(source_df, category_list, color_col, color_palette,
                           x_col, y_col, hover_list, x_label, y_label, title, subtitle)



In [13]:
# scatter plot 2
category_list = ['Democratic', 'Republican']
source_df = df_covid_election[df_covid_election['state'].isin(swing_states)]
x_col='Total Cases'
y_col='Total Deaths'
hover_list = [('State', '@state'),
              ('Total Cases', '@{Total Cases}'),
              ('Total Deaths', '@{Total Deaths}')
            ]
color_col = 'win_2016'
color_palette = ["#5DADE2","#EC7063"]
title = 'Total cases and total deaths in swing states'
subtitle = "colors from 2016 election results"
x_label = 'the number of total cases'
y_label = 'the number of total deaths'

plot_4 = make_plot_scatter(source_df, category_list, color_col, color_palette,
                           x_col, y_col, hover_list, x_label, y_label, title, subtitle)



In [14]:
# bar chart 1
source_df = df_covid_election[["state", "percent_turnout_mail_2016", "percent_turnout_mail_2020", "Total Cases", "Total Deaths",\
                               'win_2020', 'win_2016']][df_covid_election["swing_state_2020"].notnull()]
x_axis_list = swing_states
title = "the percentage of turnout by mail in 2016 and 2020 election"
y1 = "percent_turnout_mail_2016"
y2 = "percent_turnout_mail_2020"
y1_label = "2016"
y2_label = "2020"
hover_list=[("win","@$name"), 
            ('Total Cases', '@{Total Cases}'),
            ('Total Deaths', '@{Total Deaths}')]
plot_5 = make_plot_bar(source_df, x_axis_list, title, y1, y2, y1_label, y2_label, hover_list)

In [15]:
# daily 1
source_df = df_covid_daily_swing
group_col = 'win_2020'
use_col = 'tot_cases'
y_label = 'total cases (thousands)'
title = 'total cumulative cases for states where each party won in 2020'
hover_list = [('Date', '@date{%F}'), 
              ('Total Cases (thousands)', '@{tot_cases}{int}')]
plot_6 = make_plot_time_series(source_df, group_col, use_col, y_label, title, hover_list)

In [16]:
# daily 2
source_df = df_covid_daily_swing
group_col = 'win_2016'
use_col = 'tot_cases'
y_label = 'total cases (thousands)'
title = 'total cumulative cases for states where each party won in 2016'
hover_list = [('Date', '@date{%F}'), 
              ('Total Cases (thousands)', '@{tot_cases}{int}')]
plot_7 = make_plot_time_series(source_df, group_col, use_col, y_label, title, hover_list)

In [17]:

template = Template(
    """
    <!DOCTYPE html>
    <html lang="en">
        <head>
            <meta charset="utf-8">
            <title>{{ title if title else "Bokeh Plot" }}</title>
            {{ bokeh_css | safe }}
            {{ bokeh_js | safe }}
        </head>
        <body>
            {{ plot_div | safe }}
            {{ plot_script | safe }}
        </body>
    </html> 
    """)


html = file_html(Column(Row(plot_1, plot_2), 
                        Row(plot_3, plot_4),
                        Row(plot_5), 
                        Row(plot_6, plot_7)), 
                 template = template, resources = CDN)

output_file = 'plot_swingstate.html'
with open(output_file, 'w') as f:
    f.write(html)
view(output_file)
