# Visualization of Covid-19 cases by Zipcodes with Python/Bokeh

### Import the required modules

In [1]:
import tabula
import requests
import datetime
import geopandas as gpd
import pandas as pd
import json
from dateutil import parser
from datetime import datetime
from bokeh.io import reset_output, output_notebook, show

from bokeh.plotting import figure
from bokeh.models import Div, Column, Row


#make bokeh output to notebook
reset_output()
output_notebook()

### Setup the bokeh themes

In [2]:
from bokeh.io import curdoc
from bokeh.themes import Theme

curdoc().theme = Theme(json={'attrs': {

    # apply defaults to Figure properties
    'Figure': {
        'toolbar_location': None,
        'outline_line_color': None,
        'min_border_right': 10,
    },

    # apply defaults to Axis properties
    'Axis': {
        'major_tick_in': None,
        'minor_tick_out': None,
        'minor_tick_in': None,
        'axis_line_color': '#CAC6B6',
        'major_tick_line_color': '#CAC6B6',
        'axis_label_text_font_size' : '16pt',
        'major_label_text_font_size': '16pt'
    },
    
    'Title':{
        'text_font_size':'20pt'
    },
    

     # apply defaults to Legend properties
    'Legend': {
        'background_fill_alpha': 0.8,
    }
}})

### Download the Covid-19 case data from San Diego County

In [3]:
url = 'https://www.sandiegocounty.gov/content/dam/sdc/hhsa/programs/phs/Epidemiology/COVID-19%20Summary%20of%20Cases%20by%20Zip%20Code.pdf'
pdf= requests.get(url)
with open(f'covid19_in_sd_{datetime.now().strftime("%d-%b-%Y-%H_%M_%S")}.pdf','wb') as f:
    f.write(pdf.content)


### Check if the pdf is downloaded

In [4]:
!ls *.pdf

covid19_in_sd_01-May-2020-20_25_54.pdf	covid19_in_sd_22-Apr-2020-10_32_40.pdf
covid19_in_sd_02-May-2020-21_12_52.pdf	covid19_in_sd_23-Apr-2020-13_02_49.pdf
covid19_in_sd_04-May-2020-09_38_55.pdf	covid19_in_sd_23-Apr-2020-17_31_24.pdf
covid19_in_sd_05-May-2020-13_11_42.pdf	covid19_in_sd_24-Apr-2020-21_47_22.pdf
covid19_in_sd_06-May-2020-17_01_59.pdf	covid19_in_sd_25-Apr-2020-21_28_58.pdf
covid19_in_sd_07-May-2020-09_58_17.pdf	covid19_in_sd_27-Apr-2020-14_00_01.pdf
covid19_in_sd_08-May-2020-11_52_52.pdf	covid19_in_sd_28-Apr-2020-22_02_35.pdf
covid19_in_sd_09-May-2020-11_45_47.pdf	covid19_in_sd_29-Apr-2020-20_17_55.pdf
covid19_in_sd_2020-04-19.pdf		covid19_in_sd_30-Apr-2020-22_16_17.pdf
covid19_in_sd_2020-04-21.pdf


In [5]:
from filecmp import cmp
from pathlib import Path

def get_only_unique_pdf(pdfs):
    # get only unique pdfs
    unique_pdfs = [pdfs[0]]
    for file in pdfs[1:]:
        duplicate = False
        for uf in unique_pdfs:
            duplicate |= cmp(file, uf, shallow=True)
        if not duplicate:
            unique_pdfs.append(file)
    return unique_pdfs
    

curr_dir = Path('.')
pdfs = list(curr_dir.glob('**/covid19_in_sd_*.pdf'))
unique_pdfs = get_only_unique_pdf(pdfs)

print(unique_pdfs)
    

[PosixPath('covid19_in_sd_22-Apr-2020-10_32_40.pdf'), PosixPath('covid19_in_sd_05-May-2020-13_11_42.pdf'), PosixPath('covid19_in_sd_02-May-2020-21_12_52.pdf'), PosixPath('covid19_in_sd_04-May-2020-09_38_55.pdf'), PosixPath('covid19_in_sd_07-May-2020-09_58_17.pdf'), PosixPath('covid19_in_sd_01-May-2020-20_25_54.pdf'), PosixPath('covid19_in_sd_2020-04-21.pdf'), PosixPath('covid19_in_sd_25-Apr-2020-21_28_58.pdf'), PosixPath('covid19_in_sd_23-Apr-2020-13_02_49.pdf'), PosixPath('covid19_in_sd_08-May-2020-11_52_52.pdf'), PosixPath('covid19_in_sd_28-Apr-2020-22_02_35.pdf'), PosixPath('covid19_in_sd_29-Apr-2020-20_17_55.pdf'), PosixPath('covid19_in_sd_24-Apr-2020-21_47_22.pdf'), PosixPath('covid19_in_sd_06-May-2020-17_01_59.pdf'), PosixPath('covid19_in_sd_09-May-2020-11_45_47.pdf'), PosixPath('covid19_in_sd_27-Apr-2020-14_00_01.pdf'), PosixPath('covid19_in_sd_23-Apr-2020-17_31_24.pdf'), PosixPath('covid19_in_sd_30-Apr-2020-22_16_17.pdf'), PosixPath('covid19_in_sd_2020-04-19.pdf')]


### Remove unncessary pdf files if there are  any duplicates

In [6]:
import os
for pdf in pdfs:
    if pdf not in unique_pdfs:
        os.remove(pdf)
        
! ls *.pdf 

covid19_in_sd_01-May-2020-20_25_54.pdf	covid19_in_sd_22-Apr-2020-10_32_40.pdf
covid19_in_sd_02-May-2020-21_12_52.pdf	covid19_in_sd_23-Apr-2020-13_02_49.pdf
covid19_in_sd_04-May-2020-09_38_55.pdf	covid19_in_sd_23-Apr-2020-17_31_24.pdf
covid19_in_sd_05-May-2020-13_11_42.pdf	covid19_in_sd_24-Apr-2020-21_47_22.pdf
covid19_in_sd_06-May-2020-17_01_59.pdf	covid19_in_sd_25-Apr-2020-21_28_58.pdf
covid19_in_sd_07-May-2020-09_58_17.pdf	covid19_in_sd_27-Apr-2020-14_00_01.pdf
covid19_in_sd_08-May-2020-11_52_52.pdf	covid19_in_sd_28-Apr-2020-22_02_35.pdf
covid19_in_sd_09-May-2020-11_45_47.pdf	covid19_in_sd_29-Apr-2020-20_17_55.pdf
covid19_in_sd_2020-04-19.pdf		covid19_in_sd_30-Apr-2020-22_16_17.pdf
covid19_in_sd_2020-04-21.pdf


### Read the pdf and clean up the data

In [7]:
def tabula_convert_pdf_to_df(pdf):
    
    #Because pdfs are saved with different dateformat in the name, use different method.
    try:
        pdf_download_date = datetime.strptime("".join(str(pdf).split('_')[3:]).split('.')[0],"%d-%b-%Y-%H%M%S") 
    except ValueError as e:
        pdf_download_date = datetime.strptime("".join(str(pdf).split('_')[3:]).split('.')[0],"%Y-%M-%d") 
    
    print(f'Converting {pdf} to df')
    #Since they keep changing the format of the pdf we need to read pdf case by case.
    if pdf_download_date < datetime.strptime('23-04-2020', "%d-%m-%Y"):
        raw_data = tabula.read_pdf(pdf,stream=False,pages=1)[0]
        #data extraction and munging
        df = raw_data.dropna(how='all').reset_index().drop(columns=['index'])
        text_data = df.columns[0]
        df.columns = df.iloc[0].values
        df = df.drop(axis=0, labels=0).rename(columns={'Count':'CaseCount'})
        title = text_data.split('\r')[-1]    
        
        count_data= pd.DataFrame({
                  'ZipCode' : pd.concat([df.iloc[:,0].astype(str),
                                         df.iloc[:,2].astype(str)]), 

                   'CaseCount': pd.concat([df.iloc[:,1],
                                           df.iloc[:,3]])
                 })
        count_data['TotalCount'] = count_data[count_data['ZipCode']=='Total']['CaseCount'].values[0]
        count_data = count_data[count_data['ZipCode'] != 'Total']
        count_data['RatePer100000'] =  "Unknown"
        
        
    else:
        raw_data = tabula.read_pdf(pdf,stream=True,pages=1, lattice=True)
        text_data = raw_data[0].columns[0]
        title = text_data.split('\r')[-1] 
      
        count_data = pd.concat([raw_data[1], raw_data[2]],axis = 0)
        count_data = count_data.rename(columns={'Zip Code':'ZipCode', 'Count':'CaseCount', 'Rate per 100,000*': "RatePer100000"})
        count_data['TotalCount'] = count_data[count_data['ZipCode']=='San Diego County Total']['CaseCount'].values[0]
        count_data = count_data[count_data['ZipCode'] != 'San Diego County Total']
        count_data['RatePer100000'] =  count_data['RatePer100000'].replace('**',"Unknown")

    #parse dates
    dates = []
    for _ in title.split():
        try:
            dates.append(parser.parse(_, fuzzy=True))
        except Exception as e:
            pass
    date = dates[0]

    updated_time = datetime.combine(dates[1],datetime.time(dates[2]))


    count_data.dropna(how='any', inplace=True)
    count_data['ReportedDate'] = date 
    count_data['UpdatedDatetime'] = updated_time
    count_data['CaseCount'] = count_data['CaseCount'].astype('int16')
    count_data['TotalCount'] = count_data['TotalCount'].str.replace(',','').astype('int16')
    count_data['ZipCode'] = count_data['ZipCode'].astype('str')
    count_data['ZipCode'] = count_data['ZipCode'].str.replace('*','')

    
    return count_data
 

In [8]:
all_dates_count_data = []   
for pdf in unique_pdfs:
    count_data = tabula_convert_pdf_to_df(pdf)
    total_count = count_data['TotalCount'].unique()[0]
    updated_date = count_data['UpdatedDatetime'].iloc[0].to_pydatetime()
    reported_date = count_data['ReportedDate'].iloc[0].to_pydatetime()
    title = f'Date through {reported_date:%Y-%m-%d}, Updated on {updated_date:%Y-%m-%d %I:%M %p}'
    print(f'{title}, TotalCount: {total_count}')
    all_dates_count_data.append(count_data)
all_dates_count_df = pd.concat(all_dates_count_data)

all_dates_count_df = all_dates_count_df.sort_values(by='ReportedDate', ascending=False).reset_index(drop=True)
all_dates_count_df

Converting covid19_in_sd_22-Apr-2020-10_32_40.pdf to df
Date through 2020-04-20, Updated on 2020-04-21 08:00 AM, TotalCount: 2434
Converting covid19_in_sd_05-May-2020-13_11_42.pdf to df
Date through 2020-05-03, Updated on 2020-05-04 08:00 AM, TotalCount: 4020
Converting covid19_in_sd_02-May-2020-21_12_52.pdf to df
Date through 2020-05-01, Updated on 2020-05-02 08:00 AM, TotalCount: 3842
Converting covid19_in_sd_04-May-2020-09_38_55.pdf to df
Date through 2020-05-02, Updated on 2020-05-03 08:00 AM, TotalCount: 3927
Converting covid19_in_sd_07-May-2020-09_58_17.pdf to df
Date through 2020-05-05, Updated on 2020-05-06 08:00 AM, TotalCount: 4319
Converting covid19_in_sd_01-May-2020-20_25_54.pdf to df
Date through 2020-04-30, Updated on 2020-05-01 08:00 AM, TotalCount: 3711
Converting covid19_in_sd_2020-04-21.pdf to df
Date through 2020-04-19, Updated on 2020-04-20 08:00 AM, TotalCount: 2325
Converting covid19_in_sd_25-Apr-2020-21_28_58.pdf to df
Date through 2020-04-24, Updated on 2020-04-

Unnamed: 0,ZipCode,CaseCount,TotalCount,RatePer100000,ReportedDate,UpdatedDatetime
0,91932,38,4662,134.9,2020-05-07,2020-05-08 08:00:00
1,92058,23,4662,93.8,2020-05-07,2020-05-08 08:00:00
2,92029,14,4662,68.4,2020-05-07,2020-05-08 08:00:00
3,92036,2,4662,Unknown,2020-05-07,2020-05-08 08:00:00
4,92037,33,4662,77.5,2020-05-07,2020-05-08 08:00:00
...,...,...,...,...,...,...
1830,92021,70,2213,Unknown,2020-04-17,2020-04-18 08:00:00
1831,92020,71,2213,Unknown,2020-04-17,2020-04-18 08:00:00
1832,92019,53,2213,Unknown,2020-04-17,2020-04-18 08:00:00
1833,92014,16,2213,Unknown,2020-04-17,2020-04-18 08:00:00


### Store the data

In [9]:
all_dates_count_df.to_json('all_dates_count_df.json')

## Plot of cases with date

In [10]:
total_count_with_date = all_dates_count_df.groupby(['UpdatedDatetime']).first().reset_index()

total_count_with_date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   UpdatedDatetime  19 non-null     datetime64[ns]
 1   ZipCode          19 non-null     object        
 2   CaseCount        19 non-null     int16         
 3   TotalCount       19 non-null     int16         
 4   RatePer100000    19 non-null     object        
 5   ReportedDate     19 non-null     datetime64[ns]
dtypes: datetime64[ns](2), int16(2), object(2)
memory usage: 812.0+ bytes


In [11]:
p1 = figure(x_axis_type="datetime", title = 'COVID19 cases in San Diego with time',
            plot_height = 500,plot_width = 500, 
            tools = "",
           )
p1.grid.grid_line_alpha=0.3
p1.xaxis.axis_label = 'Time'
p1.yaxis.axis_label = 'Count'
p1.step(total_count_with_date['UpdatedDatetime'],total_count_with_date['TotalCount'], color='red', line_width=4,mode='center' )
#show(p1)

### Get the geojson file of communities

Later in this notebook, I want to plot the data on a map base on zip code geometry. I used the export geojson from https://data.sandiegocounty.gov/Maps-and-Geographical-Resources/Zip-Codes/vsuf-uefy to get the geojson file. In addition to the zip code geometry, it also has the name of the community it belongs to.

In [12]:
county_gpd = gpd.read_file(f'Sandiego_Zip_codes.geojson')
county_gpd["x"] = county_gpd.centroid.x
county_gpd["y"] = county_gpd.centroid.y

county_gpd.head()

Unnamed: 0,community,shape_star,shape_stle,zip,geometry,x,y
0,Alpine,4149939944.16,326045.262676,91901,"MULTIPOLYGON (((-116.74539 32.96063, -116.7408...",-116.695575,32.80574
1,Bonita,273909416.836,113257.374615,91902,"MULTIPOLYGON (((-116.97172 32.70838, -116.9712...",-117.015051,32.671579
2,Boulevard,2735681408.51,241725.552214,91905,"MULTIPOLYGON (((-116.23165 32.75083, -116.2280...",-116.305467,32.718397
3,Campo,3066759065.62,287410.325075,91906,"MULTIPOLYGON (((-116.35677 32.70460, -116.3572...",-116.469687,32.660421
4,Chula Vista,403437442.009,112587.791814,91910,"MULTIPOLYGON (((-117.06354 32.65011, -117.0634...",-117.06564,32.636404



I then merge geojson zip code geometry data with case count data. I do a 'how =right' merge with the zip code as the common key. All the right rows (rows in case count per zip code) will be preserved.

In [13]:
merged =county_gpd.merge(all_dates_count_df, right_on = 'ZipCode', left_on = 'zip',
                         how = 'right').drop(columns=['zip']).rename(columns={'community':'CommunityName'})
merged

Unnamed: 0,CommunityName,shape_star,shape_stle,geometry,x,y,ZipCode,CaseCount,TotalCount,RatePer100000,ReportedDate,UpdatedDatetime
0,Alpine,4149939944.16,326045.262676,"MULTIPOLYGON (((-116.74539 32.96063, -116.7408...",-116.695575,32.80574,91901,5,4662,28.0,2020-05-07,2020-05-08 08:00:00
1,Alpine,4149939944.16,326045.262676,"MULTIPOLYGON (((-116.74539 32.96063, -116.7408...",-116.695575,32.80574,91901,5,4429,28.0,2020-05-06,2020-05-07 08:00:00
2,Alpine,4149939944.16,326045.262676,"MULTIPOLYGON (((-116.74539 32.96063, -116.7408...",-116.695575,32.80574,91901,5,4319,28.0,2020-05-05,2020-05-06 08:00:00
3,Alpine,4149939944.16,326045.262676,"MULTIPOLYGON (((-116.74539 32.96063, -116.7408...",-116.695575,32.80574,91901,4,4160,Unknown,2020-05-04,2020-05-05 08:00:00
4,Alpine,4149939944.16,326045.262676,"MULTIPOLYGON (((-116.74539 32.96063, -116.7408...",-116.695575,32.80574,91901,4,4020,Unknown,2020-05-03,2020-05-04 08:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...
2001,,,,,,,Unknown,34,2643,Unknown,2020-04-22,2020-04-23 08:00:00
2002,,,,,,,Unknown,39,2491,Unknown,2020-04-21,2020-04-22 08:00:00
2003,,,,,,,Unknown,31,2434,Unknown,2020-04-20,2020-04-21 08:00:00
2004,,,,,,,Unknown,31,2325,Unknown,2020-04-19,2020-04-20 08:00:00


In [14]:
latest = merged[merged['ReportedDate'] == merged['ReportedDate'].max()]


## Plot the case counts as function of zip code and community

In [15]:
# Choose subset of the nongeo data.
# If there is no Community found (for ZipCode = Unknown) filll the Community with 'Unknown'

nogeo_data = latest[['ZipCode', 'CaseCount', 'CommunityName','ReportedDate']].fillna('Unknown')


Split the communities into three parts to plot them seperately.

In [16]:
import numpy as np
split0 = nogeo_data.loc[nogeo_data['CommunityName'] == 'San Diego']

## Split the all other remaining 
split1, split2 = np.array_split(nogeo_data.loc[nogeo_data['CommunityName'] != 'San Diego'], 2)

# make groupby to create bokeh nested x range plots
sandiego = split0.groupby(by=['CommunityName', 'ZipCode'])
part1 = split1.groupby(by=['CommunityName', 'ZipCode'])
part2 = split2.groupby(by=['CommunityName', 'ZipCode'])

In [17]:
def create_plot(df):
    source = df.groups
    
    p = figure(plot_width=800, plot_height=300, x_range=df, toolbar_location=None,tools='',sizing_mode='scale_width',
               tooltips=[("Case Count", "@CaseCount_mean"),
                         ("Community Name, ZipCode", "@CommunityName_ZipCode")
                        ]
              )

    p.vbar(x='CommunityName_ZipCode', top='CaseCount_mean', width=1, source=df,
           line_color="white" )

    p.y_range.start = 0
    p.x_range.range_padding = 0.05
    p.xgrid.grid_line_color = None
    p.yaxis.axis_label = "Case Count"
    p.xaxis.major_label_orientation = 22/28
    p.xaxis.group_label_orientation = 22/28
    p.xaxis.major_label_text_font_size = "7pt"
    p.xaxis.group_text_font_size = "12pt"
    p.title.text_font_size = "16pt"
    p.outline_line_color = None
    p.x_range.group_padding = 1.0
    return p

In [18]:
bar_case_count= Column( create_plot(sandiego), create_plot(part1), create_plot(part2))
# groupby are not sorted by bokeh. seems like a bug
#show(bar_case_count)


# Map of Covid 19 cases according to Zip Code in San Diego

In [19]:

# Convert merged data to json. Because there are some case counts from 
# "unknown" they dont have any geo information. Also have to remove datetime fields
# because they cant be serialized with json
latest.loc[:, 'CaseCount_scaled'] = latest['CaseCount']/8
latest_json = json.loads(latest.drop(columns = ['ReportedDate', 'UpdatedDatetime']).dropna().to_json())

# Convert to json to str like object because bokeh needs it in this form.
latest_json_str = json.dumps(latest_json)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [20]:
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, HoverTool
from bokeh.palettes import brewer

def cases_map(latest_json_str):
    
    geosource = GeoJSONDataSource(geojson = latest_json_str)
    
    #Add hover tool
    hover = HoverTool(tooltips = [ ('Case Count','@CaseCount'),
                                  ('Zip Code', '@ZipCode'),
                                  ('Community Name', '@CommunityName')],
                     names=['patches'],
                      point_policy='follow_mouse',
                     )
    #Create figure object.
    map_figure = figure(
        x_axis_location=None, y_axis_location=None,
               plot_height = 500, plot_width = 500, 
               toolbar_location = None,
              tools = [hover])

    map_figure.xgrid.grid_line_color = None
    map_figure.ygrid.grid_line_color = None

    #Add patch renderer to figure. 
    map_figure.patches('xs','ys', source = geosource,
                       fill_color='white',
#              fill_color = {'field' :'CaseCount', 'transform' : color_mapper},
               line_color = 'black', line_width = 0.25, fill_alpha = 1, name = 'patches')
    
    map_figure.circle(x="x", y="y",size='CaseCount_scaled', fill_color="red",line_color='red', fill_alpha=0.5, source=geosource, name = 'markers')

    map_figure.title.text_font_size = '16pt'
    #Specify figure layout.
    #map_figure.add_layout(color_bar, 'above')

    total_count = latest['TotalCount'][0]
    reported_date = latest['ReportedDate'][0].to_pydatetime()
    updated_date = latest['UpdatedDatetime'][0].to_pydatetime()
    title = f'Date through {reported_date:%Y-%m-%d}, Updated on {updated_date:%Y-%m-%d %I:%M %p}'
    unknown = int(latest[latest['ZipCode']=='Unknown']['CaseCount'].iloc[0])
    
    
    cases = Column(Div(text = f'Number of Cases*',style={'font-size': '125%', 'color': 'blue'}), 
                   map_figure,
                   Div(text = f'*Unknown: {unknown}',style={'font-size': '100%', 'color': 'blue'}))
    
    # Here we plot the chlorpleth of the cases per 10000 cases
    #Define a sequential multi-hue color palette.
    palette = brewer['YlOrRd'][8]

    #Reverse color order 
    palette = palette[::-1]
    #max(merged['CaseCount'])
    #Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
    color_mapper = LinearColorMapper(palette = palette, low = 0, 
                                     high = 400)

    tick_labels = {0:"0", 
                   50:"50",
                   100:"100",
                   150:"150",
                   200:"200", 
                   250:"250",
                   300: "300",
                   350:"350",
                   400:">400"
                  }
    #Create color bar. 
    color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8, 
                         width = 250, height = 20,border_line_color=None,
                         location = (0,0), 
                         orientation = 'horizontal', 
                         major_label_overrides = tick_labels
                        )

    #Add hover tool
    hover = HoverTool(tooltips = [ ('Rate Per 100000','@RatePer100000'),
                                  ('Zip Code', '@ZipCode'),
                                  ('Community Name', '@CommunityName')]
                     )

    #Create figure object.
    map_cases_per_population = figure(
        x_axis_location=None, y_axis_location=None,
               plot_height = 500,plot_width = 500, 
               toolbar_location = None,
              tools = [hover])

    map_cases_per_population.xgrid.grid_line_color = None
    map_cases_per_population.ygrid.grid_line_color = None

    #Add patch renderer to figure. 
    map_cases_per_population.patches('xs','ys', source = geosource,
              fill_color = {'field' :'RatePer100000', 'transform' : color_mapper},
              line_color = 'black', line_width = 0.25, fill_alpha = 1)

    map_cases_per_population.title.text_font_size = '16pt'
    #Specify figure layout.
    map_cases_per_population.add_layout(color_bar, 'below')

    chloropleth_cases_per_pop = Column( Div(text = 'Cases per 100,000 people',  style={'font-size': '125%', 'color': 'blue'}),
                                  map_cases_per_population)
    maps = Column(Div(text = title,  style={'font-size': '200%', 'color': 'blue'}),
            Div(text = f'Total number of cases: {total_count}',style={'font-size': '200%', 'color': 'red'}), 
                  Row(cases, chloropleth_cases_per_pop)
                 )
    return maps


                    
                    
#show(cases_map(latest_json_str))



In [21]:
collage = Column(cases_map(latest_json_str),
               bar_case_count, 
                p1
                )
#show(collage)

In [22]:
# Generate standlone html documents with the collage of both plots

from bokeh.resources import CDN
from bokeh.embed import file_html

updated_date = latest['UpdatedDatetime'][0].to_pydatetime()
try:
    html = file_html(collage, CDN, 'Covid19 Cases in San Diego with Bokeh')
    with open(f'Covid19_{updated_date:%Y-%m-%d_%I_%M_%p}.html','w') as f:
        f.write(html)
except Exception as e:
    print(e)
    

