In [1]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.resources import CDN
from bokeh.embed import file_html
from bokeh.embed import components
from jinja2 import Template
import datetime
import os

In [2]:
#etl
def covid_csv_to_df(csv_path):
    df = pd.read_csv(csv_path)
    df.rename(columns={'Country/Region':'Country_Region','Province/State':'Province_State'},inplace=True)
    df.fillna('None',inplace=True)
    return df    

def covid_province_state(covid_df,province_state):
    covid_df['Confirmed'].replace(to_replace='None',value=0,inplace=True)
    covid_df['Deaths'].replace(to_replace='None',value=0,inplace=True)
    covid_df['Recovered'].replace(to_replace='None',value=0,inplace=True)
    q_str = 'Province_State=="{pr}"'.format(pr=province_state)
    con=covid_df.query(q_str).sum(axis=0)['Confirmed']
    dea=covid_df.query(q_str).sum(axis=0)['Deaths']
    rec=covid_df.query(q_str).sum(axis=0)['Recovered']
    act=con - (dea + rec)
    try:
        if 'Last_Update' in covid_df.keys():
            dat=covid_df.query(q_str)['Last_Update'].values[0]
            dat=dat.split(' ')[0]
        elif 'Last Update' in covid_df.keys():
            dat=covid_df.query(q_str)['Last Update'].values[0]
            dat=dat.split('T')[0]
    except:
        dat='NA'
    return {'province_state':province_state,
            'confirmed':con,
            'deaths':dea,
            'active':act,
            'date':dat}

def covid_country_region(covid_df,country_region):
    covid_df['Confirmed'].replace(to_replace='None',value=0,inplace=True)
    covid_df['Deaths'].replace(to_replace='None',value=0,inplace=True)
    covid_df['Recovered'].replace(to_replace='None',value=0,inplace=True)
    q_str = 'Country_Region=="{cr}"'.format(cr=country_region)
    con=covid_df.query(q_str).sum(axis=0)['Confirmed']
    dea=covid_df.query(q_str).sum(axis=0)['Deaths']
    rec=covid_df.query(q_str).sum(axis=0)['Recovered']
    act=con - (dea + rec)
    try:
        if 'Last_Update' in covid_df.keys():
            dat=covid_df.query(q_str)['Last_Update'].values[0]
            dat=dat.split(' ')[0]
        elif 'Last Update' in covid_df.keys():
            dat=covid_df.query(q_str)['Last Update'].values[0]
            dat=dat.split('T')[0]
    except:
        dat='NA'
    return {'country_region':country_region,
            'confirmed':con,
            'deaths':dea,
            'active':act,
            'date':dat}

def dates_to_filenames(start_date,end_date):
    date_range = pd.date_range(start=start_date,end=end_date)
    reports = []
    for d in date_range:
        dt = str(d).split(' ')[0]
        yr = dt.split('-')[0]
        mn = dt.split('-')[1]
        dy = dt.split('-')[2]
        fname = mn + '-' + dy + '-' + yr + '.csv'
        reports.append(fname)
    return reports

#analytics
def covid_dict_to_timeseries_df(ps_dict,start_date,end_date):
    df=pd.DataFrame(ps_dict,index=pd.date_range(start_date,end_date))
    df['total_change'] = np.zeros((len(df.index),1))
    df['percent_change'] = np.zeros((len(df.index),1))
    final = df['confirmed'].loc[df.index[1]:df.index[-1]].values
    initial = df['confirmed'].loc[df.index[0]:df.index[-2]].values
    tc = final - initial
    tc = np.insert(tc,0,0)
    df['total_change'] = tc
    denom = initial[np.where(initial==0)]=np.nan
    pc = ((final - initial) / initial) * 100
    pc = np.insert(pc,0,0)
    df['percent_change'] = pc
    df.fillna(0,inplace=True)
    return df

#plots
def plot_total_cases(ts_df,title='World',show_plot=True):
    src = ColumnDataSource(ts_df)
    src.data.keys()
    #title_str = '{}: COVID-19 Cases'.format(title) 
    title_str = 'COVID-19 Cases' 
    
    # Create a blank figure with labels
    p = figure(plot_width=425,plot_height=425, 
               title=title_str,
               x_axis_label='Date',
               y_axis_label='Number',
               x_axis_type='datetime')

    # Add glyphs
    p.varea(source=src,
               x='index',
               y1='confirmed',
               y2='active',
               fill_color="aliceblue")
    p.line(source=src,
           x='index',
           y='confirmed',
           color='blue',
           legend_label='Confirmed Cases')
    p.line(source=src,
           x='index',
           y='active',
           color='cornflowerblue',
           legend_label='Active Cases')
    p.line(source=src,
           x='index',
           y='deaths',
           color='darkgray',
           legend_label='Deaths')           
    p.circle(source=src,
             x='index',
             y='deaths',
             size=1,
             color='darkgray')
    p.circle(source=src,
             x='index',
             y='confirmed',
             size=1,
             color='blue')
    p.circle(source=src,
             x='index',
             y='active',
             size=1,
             color='cornflowerblue')

    p.legend.location = "top_left"
    
    hover = HoverTool(tooltips = [('Confirmed', '@confirmed'),
                                  ('Active', '@active'),
                                  ('Deaths', '@deaths')])
        
    # Add the hover tool to the graph
    p.add_tools(hover)
    
    if show_plot==True:
        # Set to output the plot in the notebook
        output_notebook()
        # Show the plot
        show(p)

    return p
    
def plot_percent_change(ts_df,title='World',show_plot=True):
    src = ColumnDataSource(ts_df)
    src.data.keys()
    #title_str = '{}: Percent Change in Confirmed Cases'.format(title) 
    title_str = 'Percent Change in Confirmed Cases' 
    
    # Create a blank figure with labels
    p = figure(plot_width=425,plot_height=425, 
               title=title_str,
               x_axis_label='Date',
               y_axis_label='Percent Change',
               x_axis_type='datetime')

    # Add glyphs
    p.circle(source=src,
             x='index',
             y='percent_change',
             size=2,
             color='blue')
    p.line(source=src,
           x='index',
           y='percent_change',
           color='blue')

    hover = HoverTool(tooltips = [('Percent Change: ', '@percent_change')])
    
    # Add the hover tool to the graph
    p.add_tools(hover)
    
    if show_plot==True:
        # Set to output the plot in the notebook
        output_notebook()
        # Show the plot
        show(p)

    return p

def plot_total_change(ts_df,title='World',show_plot=True):
    src = ColumnDataSource(ts_df)
    src.data.keys()
    #title_str = '{}: Change in Confirmed Cases'.format(title) 
    title_str = 'Change in Confirmed Cases' 
    
    # Create a blank figure with labels
    p = figure(plot_width=425,plot_height=425, 
               title=title_str,
               x_axis_label='Date',
               y_axis_label='Number of Cases',
               x_axis_type='datetime')

    p.vbar(source=src,
           x='index',
           top='total_change',
           width=0.5,
           color='blue')
    p.xgrid.grid_line_color = None
    p.y_range.start = 0
    
    if show_plot==True:
        # Set to output the plot in the notebook
        output_notebook()
        # Show the plot
        show(p)

    return p

#html
template = Template(
    '''<!DOCTYPE html>
        <html lang="en">
            <head>
                <meta charset="utf-8">
                <link rel="stylesheet" type="text/css" href="../css/stylesheet.css" media="screen"/>
                <title> {{ title }} </title>
                <style>
                </style>                
                {{ resources_cc }}
                {{ script_cc }}
                {{ resources_pc }}
                {{ script_pc }}
                {{ resources_tc }}
                {{ script_tc }}
            </head>
            <body class="body">
                <div class="grid-container">
                    <div class="grid-infog">
                        <img src="../images/{{ png }}" height={{ iheight }} width={{ iwidth }} class="gpx" />
                        <p>{{ hdr }}</p>
                        <p>{{ cases }}</p>
                        <p>{{ deaths }}</p>
                    </div>           
                    <div class="embed-wrapper">
                        {{ div_cc }}
                    </div>            
                    <div class="embed-wrapper">
                        {{ div_pc }}
                    </div>
                    <div class="embed-wrapper">
                        {{ div_tc }}
                    </div>
                </div>
                <div class="footer">
                    <p>Data sourced from <a href="https://github.com/CSSEGISandData">Johns Hopkins</a>.</p>
                    <p>Last updated {{ update }}.</p>
                </div>
 
             </body>
        </html>
        ''')

def gen_covid_html(param_dict):
    filenames = dates_to_filenames(param_dict['start_date'],param_dict['end_date'])
    recs=[]
    for f in filenames:
        try:
            f_path = param_dict['reports_path'] + f
            covid = covid_csv_to_df(f_path)
            if 'state' in param_dict.keys():
                daily=covid_province_state(covid,param_dict['state'])
            if 'country' in param_dict.keys():
                daily=covid_country_region(covid,param_dict['country'])
            recs.append(daily)
        except:
            print(f)
    covid_ts = covid_dict_to_timeseries_df(recs,param_dict['start_date'],param_dict['end_date'])
    cc=plot_total_cases(covid_ts,param_dict['title'],show_plot=False)
    pc=plot_percent_change(covid_ts,param_dict['title'],show_plot=False)
    tc=plot_total_change(covid_ts,param_dict['title'],show_plot=False)
        
    #bokeh html embed components
    script_cc, div_cc = components(cc)
    resources_cc = CDN.render()
    script_pc, div_pc = components(pc)
    resources_pc = CDN.render()
    script_tc, div_tc = components(tc)
    resources_tc = CDN.render()
    cases = "Cases: " + str(int(covid_ts['confirmed'].max()))
    deaths = "Deaths: " + str(int(covid_ts['deaths'].max()))
    hdr = param_dict['title']
    png=param_dict['html']['img']
    if param_dict['html']['img_shape']=='tall':
        iheight = 240
        iwidth = 140
    elif param_dict['html']['img_shape']=='wide':
        iheight = 200
        iwidth = 275
    update = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    #render everything together
    html = template.render(title=param_dict['title'],
                           resources_cc=resources_cc,
                           script_cc=script_cc,
                           div_cc=div_cc,
                           resources_pc=resources_pc,
                           script_pc=script_pc,
                           div_pc=div_pc,
                           resources_tc=resources_tc,
                           script_tc=script_tc,
                           div_tc=div_tc,
                           cases=cases,
                           deaths=deaths,
                           hdr=hdr,
                           png=png,
                           iheight=iheight,
                           iwidth=iwidth,
                           update=update
                          )
    #write
    with open(param_dict['html']['dir'],'w') as f:
            f.write(html) 
    

In [14]:
reports_path=os.path.join(os.environ['HOME'],'Documents/code/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/')
html_path=os.path.join(os.environ['HOME'],'Documents/code/covid-static/pages/')

In [16]:
usa = {'reports_path': reports_path,
          'html': {'dir': html_path + 'usa.html',
                   'img': 'usa.png',
                   'img_shape': 'wide'
                  },
          'country': 'US',
          'start_date': '01/22/2020',
          'end_date': '03/29/2020',
          'title': 'USA - COVID-19'
         }

In [4]:
ca = {'reports_path': reports_path,
          'html': {'dir': html_path + 'california.html',
                   'img': 'california.png',
                   'img_shape': 'tall'
                  },
          'state': 'California',
          'start_date': '01/22/2020',
          'end_date': '03/29/2020',
          'title': 'California - COVID-19'
         }

In [5]:
md = {'reports_path': reports_path,
          'html': {'dir': html_path + 'maryland.html',
                   'img': 'maryland.png',
                   'img_shape': 'wide'
                  },
          'state': 'Maryland',
          'start_date': '01/22/2020',
          'end_date': '03/29/2020',
          'title': 'Maryland - COVID-19'
         }

In [6]:
nj = {'reports_path': reports_path,
          'html': {'dir': html_path + 'new_jersey.html',
                   'img': 'new_jersey.png',
                   'img_shape': 'tall'
                  },
          'state': 'New Jersey',
          'start_date': '01/22/2020',
          'end_date': '03/29/2020',
          'title': 'New Jersey - COVID-19'
         }

In [7]:
wa = {'reports_path': reports_path,
          'html': {'dir': html_path + 'washington.html',
                   'img': 'washington.png',
                   'img_shape': 'wide'
                  },
          'state': 'Washington',
          'start_date': '01/22/2020',
          'end_date': '03/29/2020',
          'title': 'Washington - COVID-19'
         }

In [8]:
ore = {'reports_path': reports_path,
          'html': {'dir': html_path + 'oregon.html',
                   'img': 'oregon.png',
                   'img_shape': 'wide'
                  },
          'state': 'Oregon',
          'start_date': '01/22/2020',
          'end_date': '03/29/2020',
          'title': 'Oregon - COVID-19'
         }

In [9]:
me = {'reports_path': reports_path,
          'html': {'dir': html_path + 'maine.html',
                   'img': 'maine.png',
                   'img_shape': 'tall'
                  },
          'state': 'Maine',
          'start_date': '01/22/2020',
          'end_date': '03/29/2020',
          'title': 'Maine - COVID-19'
         }

In [10]:
va = {'reports_path': reports_path,
          'html': {'dir': html_path + 'virginia.html',
                   'img': 'virginia.png',
                   'img_shape': 'wide'
                  },
          'state': 'Virginia',
          'start_date': '01/22/2020',
          'end_date': '03/29/2020',
          'title': 'Virginia - COVID-19'
         }

In [11]:
mi = {'reports_path': reports_path,
          'html': {'dir': html_path + 'michigan.html',
                   'img': 'michigan.png',
                   'img_shape': 'tall'
                  },
          'state': 'Michigan',
          'start_date': '01/22/2020',
          'end_date': '03/29/2020',
          'title': 'Michigan - COVID-19'
         }

In [12]:
locs = [usa,ca,md,nj,wa,ore,me,va,mi]

In [13]:
for l in locs:
    gen_covid_html(l)