#### import libraries

In [1]:
import os
import pandas as pd
import numpy as np
import geopandas as gpd
from bokeh.embed import file_html
from bokeh.models import Row, Column
from bokeh.resources import CDN
from bokeh.util.browser import view
from jinja2 import Template
from pathlib import Path
current_location = os.getcwd()
os.chdir(Path(current_location).parent)
from CovidVoting.add_data import (add_data_csv, add_data_shapefile)
from CovidVoting.make_plot_2020_and_2016 import (make_plot)
from CovidVoting.make_plot_swingstate import (make_plot_map, make_plot_scatter,
                                              make_plot_bar, make_plot_time_series)

#### read data

In [2]:
df_state = pd.read_csv("./data/raw_0_states.csv")
contiguous_usa = gpd.read_file("./data/shapefiles/cb_2018_us_state_20m.shp")
#df_covid_daily = pd.read_csv("./data/raw_1_covid_daily.csv")
#df_covid = pd.read_csv("./data/raw_2_covid_latest.csv")
df_election = pd.read_csv("./data/use_election.csv")

#### add data

In [3]:
swing_states = ["Arizona", "Colorado", "Florida",
                "Georgia", "Iowa", "Michigan",
                "Minnesota", "Nevada", "New Hampshire",
                "North Carolina", "Ohio",
                "Pennsylvania", "Texas", "Wisconsin"]
swing_states_code = list(df_state["state_code"][df_state["state"].isin(swing_states)])
all_states = list(df_state["state"])
all_states_code = list(df_state["state_code"])

In [4]:
# add election data into covid data
base_data = "./data/raw_2_covid_latest.csv"
new_data = "./data/use_election.csv"
base_state_col = 'State/Territory'
new_state_col = 'state'
use_state = all_states
how_join = 'right'
df_covid_election = add_data_csv(base_data, new_data, base_state_col, new_state_col, use_state, how_join)
df_covid_election.head(3)

Unnamed: 0,State/Territory,Total Cases,Confirmed Cases,Probable Cases,Cases in Last 7 Days,Case Rate per 100000,Total Deaths,Confirmed Deaths,Probable Deaths,Deaths in Last 7 Days,...,total_2020,turnout_by_mail_2020,percent_turnout_mail_2020,win_2020,percent_2020,color_2020,BIDEN_percent,BIDEN_votes,TRUMP_percent,TRUMP_votes
0,Alabama,234080,195887.0,38193.0,14848,4789,3459,3155.0,304.0,210,...,2321377,300000,0.129234,Republican,-0.254,-6,0.366,849624,0.62,1441170
1,Alaska,27085,,,3845,3673,102,,,4,...,358649,152000,0.423813,Republican,-0.1,-2,0.428,153502,0.528,189543
2,Arizona,302324,293574.0,8750.0,25412,4216,6464,5992.0,472.0,162,...,3384904,2471000,0.730006,Democratic,0.003,2,0.494,1672143,0.491,1661686


In [5]:
# add covid daily data into state information
base_data = "./data/raw_0_states.csv"
new_data = "./data/raw_1_covid_daily.csv"
base_state_col = 'state_code'
new_state_col = 'state_code'
use_state = all_states_code
how_join = 'inner'
df_covid_daily = add_data_csv(base_data, new_data, base_state_col, new_state_col, use_state, how_join)
df_covid_daily.head(3)

Unnamed: 0,state,state_code,region,division,date,tot_cases,new_case,tot_death,new_death
0,Alaska,AK,West,Pacific,01/22/2020,0,0,0,0
1,Alaska,AK,West,Pacific,01/23/2020,0,0,0,0
2,Alaska,AK,West,Pacific,01/24/2020,0,0,0,0


In [6]:
# add data for swing states
base_data = "./data/raw_0_states.csv"
new_data = "./data/raw_1_covid_daily.csv"
base_state_col = 'state_code'
new_state_col = 'state_code'
use_state = swing_states_code
how_join = 'inner'
df_covid_daily_swing = add_data_csv(base_data, new_data, base_state_col, new_state_col, use_state, how_join)
df_covid_daily_swing.head(3)

Unnamed: 0,state,state_code,region,division,date,tot_cases,new_case,tot_death,new_death
0,Arizona,AZ,West,Mountain,01/22/2020,0,0,0,0
1,Arizona,AZ,West,Mountain,01/23/2020,0,0,0,0
2,Arizona,AZ,West,Mountain,01/24/2020,0,0,0,0


In [7]:
# process election and covid data
df_covid_election = df_covid_election.loc[df_covid_election["state"].isin(contiguous_usa["NAME"])]
df_covid_election["swing_state_2020"]= np.where(df_covid_election[
                                                "state"].isin(swing_states),
                                                df_covid_election[
                                                'color_2020'], np.nan)
df_covid_election["swing_state_2016"]= np.where(df_covid_election[
                                                "state"].isin(swing_states),
                                                df_covid_election['color_2016'],
                                                np.nan)
# process daily covid data
df_covid_daily['date'] = pd.to_datetime(df_covid_daily['date'],
                                        format='%m/%d/%Y')

# data for swing states
# process daily covid data
df_covid_daily_swing['date'] = pd.to_datetime(df_covid_daily_swing['date'],
                                              format='%m/%d/%Y')
df_covid_daily_swing = pd.merge(left=df_covid_daily_swing,
                                right=df_election[["state",
                                "win_2016", "win_2020"]],
                                left_on='state', right_on='state')

#### make plots

In [8]:
# all states plot for 2016 and 2020 election
plot_0 = make_plot(df_covid_election, contiguous_usa)

# map 1
hover_list = [('State', '@NAME')]
plot_1 = make_plot_map(df_covid_election, contiguous_usa,
                       'swing_state_2020', 'color_2020', hover_list,
                       '2020 Election Result of Swing States')
# map 2
hover_list = [('State', '@NAME')]
plot_2 = make_plot_map(df_covid_election, contiguous_usa,
                       'swing_state_2016', 'color_2016', hover_list,
                       '2016 Election Result of Swing States')
                       
# scatter plot 1
category_list = ['Democratic', 'Republican']
source_df = df_covid_election[df_covid_election['state'].isin(swing_states)]
x_col = 'Total Cases'
y_col = 'Total Deaths'
hover_list = [('State', '@state'),
              ('Total Cases', '@{Total Cases}'),
              ('Total Deaths', '@{Total Deaths}')]
color_col = 'win_2020'
color_palette = ["#5DADE2", "#EC7063"]
title = 'Total cases and total deaths in swing states'
subtitle = "colors from 2020 election results"
x_label = 'the number of total cases'
y_label = 'the number of total deaths'
plot_3 = make_plot_scatter(source_df, category_list, color_col, color_palette,
                           x_col, y_col, hover_list,
                           x_label, y_label, title, subtitle)
# scatter plot 2
category_list = ['Democratic', 'Republican']
source_df = df_covid_election[df_covid_election['state'].isin(swing_states)]
x_col = 'Total Cases'
y_col = 'Total Deaths'
hover_list = [('State', '@state'),
              ('Total Cases', '@{Total Cases}'),
              ('Total Deaths', '@{Total Deaths}')]
color_col = 'win_2016'
color_palette = ["#5DADE2", "#EC7063"]
title = 'Total cases and total deaths in swing states'
subtitle = "colors from 2016 election results"
x_label = 'the number of total cases'
y_label = 'the number of total deaths'
plot_4 = make_plot_scatter(source_df, category_list, color_col, color_palette,
                           x_col, y_col, hover_list,
                           x_label, y_label, title, subtitle)
# bar chart 1
source_df = df_covid_election[["state", "percent_turnout_mail_2016",
                               "percent_turnout_mail_2020", "Total Cases",
                               "Total Deaths", 'win_2020',
                               'win_2016']][df_covid_election
                                            ["swing_state_2020"].notnull()]
x_axis_list = swing_states
title = "the percentage of turnout by mail in 2016 and 2020 election"
y1 = "percent_turnout_mail_2016"
y2 = "percent_turnout_mail_2020"
y1_label = "2016"
y2_label = "2020"
hover_list = [("win", "@$name"),
              ('Total Cases', '@{Total Cases}'),
              ('Total Deaths', '@{Total Deaths}')]
plot_5 = make_plot_bar(source_df, x_axis_list, title,
                       y1, y2, y1_label, y2_label, hover_list)
# daily 1
source_df = df_covid_daily_swing
group_col = 'win_2020'
use_col = 'tot_cases'
y_label = 'total cases (thousands)'
title = 'total cumulative cases for states where each party won in 2020'
hover_list = [('Date', '@date{%F}'),
              ('Total Cases (thousands)', '@{tot_cases}{int}')]
plot_6 = make_plot_time_series(source_df, group_col, use_col,
                               y_label, title, hover_list)
# daily 2
source_df = df_covid_daily_swing
group_col = 'win_2016'
use_col = 'tot_cases'
y_label = 'total cases (thousands)'
title = 'total cumulative cases for states where each party won in 2016'
hover_list = [('Date', '@date{%F}'),
              ('Total Cases (thousands)', '@{tot_cases}{int}')]
plot_7 = make_plot_time_series(source_df, group_col, use_col, y_label,
                               title, hover_list)



#### show plots in an html file

In [9]:
template = Template(
    """
    <!DOCTYPE html>
    <html lang="en">
        <head>
            <meta charset="utf-8">
            <title>{{ title if title else "Bokeh Plot" }}</title>
            {{ bokeh_css | safe }}
            {{ bokeh_js | safe }}
        </head>
        <body>
            {{ plot_div | safe }}
            {{ plot_script | safe }}
        </body>
    </html>
    """)
html = file_html(Column(Row(plot_1, plot_2),
                        Row(plot_3, plot_4),
                        Row(plot_5),
                        Row(plot_6, plot_7)),
                 template=template, resources=CDN)
output_file = './example/plot_swingstate.html'
with open(output_file, 'w') as f:
    f.write(html)
view(output_file)

html2 = file_html(plot_0, CDN, "plot all states")
output_file2 = './example/plot_all_states.html'
with open(output_file2, 'w') as f:
    f.write(html2)
view(output_file2)