# COVID-19 cases among healthcare workers

#### Updated 4/10/2020

### Load python tools

In [1]:
import pandas as pd
from shapely.geometry import shape
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import openpyxl
import altair as alt
from altair_saver import save
import lxml
import requests
from shapely.geometry import Point, LineString
import geojson
import json
import glob
import io
import os
import pyarrow
from shapely.geometry import Point, LineString, MultiLineString
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.float_format = '{:,.0f}'.format

### Read healthcare worker infections data from CHHS

In [2]:
hcworkers = pd.read_excel('input/HCW tables for LAT_20200408.xlsx')

In [3]:
hcworkers['County'] = hcworkers['County'].str.title()

In [4]:
hcworkers.columns = hcworkers.columns.str.lower()

In [5]:
hcworkers['healthcareworker'] = hcworkers['healthcareworker']\
    .astype(str).apply(lambda x: x.replace('<',''))

In [6]:
hcworkers['healthcareworker'] = hcworkers['healthcareworker'].astype(float)

### Read data from hospital utilization report

In [7]:
pop = pd.read_csv('/Users/mhustiles/data/github/notebooks/coronavirus/output/icus_county_pop_slim.csv')

In [8]:
pop = pop.drop(['id'], axis=1)

In [9]:
pop.head()

Unnamed: 0,county,total_beds,icu_beds,population,icus_per1000
0,Alameda,3746,264,1643700,16
1,Amador,52,6,37829,16
2,Butte,653,59,227075,26
3,Calaveras,25,8,45235,18
4,Colusa,48,6,21464,28


---

### Read data from *LA Times* coronavirus tracker

In [10]:
counties = pd.read_json('/Users/mhustiles/data/github/coronavirus-tracker/_data/counties/totals/all.json')

In [11]:
testing = pd.read_json('/Users/mhustiles/data/github/coronavirus-tracker/_data/testing/timeseries.json')

In [12]:
counties['fips'] = counties['fips'].astype(str).str.zfill(3)

In [13]:
counties.head()

Unnamed: 0,fips,county,confirmed_cases,deaths
0,1,Alameda,719,17
1,3,Alpine,1,0
2,5,Amador,7,0
3,7,Butte,13,0
4,9,Calaveras,8,0


---

### CA county geography

In [None]:
county_geo = gpd.read_file('/Users/mhustiles/data/data/gis/ca-counties.geojson')

In [None]:
county_geo.columns = county_geo.columns.str.lower()

In [None]:
len(county_geo)

---

### Merge with other dataframes

In [None]:
county_geo_pop = county_geo.merge(pop, left_on='name', right_on='county', how='left')

In [None]:
county_geo_pop_covid = county_geo_pop.merge(counties, on='county', how='left')

In [None]:
county_geo_pop_covid_all = county_geo_pop_covid.merge(hcworkers, on='county', how='left')

---

In [None]:
workers = pd.DataFrame(county_geo_pop_covid_all[['fips_x', 'name', 'total_beds', \
                                    'population', 'confirmed_cases', \
                                    'healthcareworker', 'deaths']])

In [None]:
workers.rename(columns={"fips_x": "fips"}, inplace=True)

In [None]:
workers['hc_cases_share'] = ((workers.healthcareworker / workers.confirmed_cases)*100).round(2)
workers['hc_per_100k_pop'] = ((workers.healthcareworker * 100000)/workers.population).round(2)
workers['hc_per_100_hosp_beds'] = ((workers.healthcareworker * 100)/workers.total_beds).round(2)

In [None]:
workersout = workers[workers['healthcareworker'] > 5].sort_values(by='hc_per_100_hosp_beds', ascending=False)

In [None]:
workersout.head()

### Statewide share of confirmed COVID cases that are healthcare workers

In [None]:
'{:,.2f}%'.format((workersout.healthcareworker.sum() / workersout.confirmed_cases.sum()*100))

In [None]:
workersout.to_csv('output/healthcareworkers.csv')