# Analyzing deaths in California during Covid-19 pandemic

In [542]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt
%matplotlib inline
import json
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [543]:
deathsold = pd.read_csv('input/Weekly_Counts_of_Deaths_by_State_and_Select_Causes__2014-2018.csv')

In [544]:
deathsnu = pd.read_csv('input/Weekly_Counts_of_Deaths_by_State_and_Select_Causes__2019-2020.csv')

In [545]:
deathsnu.columns = deathsnu.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('__', '_').str.replace('(', '').str.replace(')', '').str.replace('-','_')
deathsold.columns = deathsold.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('__', '_').str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [546]:
deaths = pd.concat([deathsold, deathsnu], ignore_index=True)

In [547]:
deaths_melt = pd.melt(deaths, id_vars=['jurisdiction_of_occurrence', 'mmwr_year', 'mmwr_week', 'week_ending_date']\
                      , value_vars=['all_cause', 'natural_cause', 'septicemia_a40_a41',
       'malignant_neoplasms_c00_c97', 'diabetes_mellitus_e10_e14',
       'alzheimer_disease_g30', 'influenza_and_pneumonia_j10_j18',
       'chronic_lower_respiratory_diseases_j40_j47',
       'other_diseases_of_respiratory_system_j00_j06,j30_j39,j67,j70_j98',
       'nephritis,_nephrotic_syndrome_and_nephrosis_n00_n07,n17_n19,n25_n27',
       'symptoms,_signs_and_abnormal_clinical_and_laboratory_findings,_not_elsewhere_classified_r00_r99',
       'diseases_of_heart_i00_i09,i11,i13,i20_i51',
       'cerebrovascular_diseases_i60_i69'],
        var_name='cause', value_name='count')

In [548]:
deaths_melt.head()

Unnamed: 0,jurisdiction_of_occurrence,mmwr_year,mmwr_week,week_ending_date,cause,count
0,Alabama,2014,1,01/04/2014,all_cause,355.0
1,Alabama,2014,2,01/11/2014,all_cause,872.0
2,Alabama,2014,3,01/18/2014,all_cause,1044.0
3,Alabama,2014,4,01/25/2014,all_cause,1022.0
4,Alabama,2014,5,02/01/2014,all_cause,1040.0


In [549]:
deaths_melt['mmwr_year'] = deaths_melt['mmwr_year'].astype(str)
deaths_melt['mmwr_week'] = deaths_melt['mmwr_week'].astype(int)

In [550]:
ca_deaths = deaths_melt[deaths_melt['jurisdiction_of_occurrence'] == 'California']

### What's the mean number of deaths per week, 2014-2019?

In [551]:
ca_deaths.head(10)

Unnamed: 0,jurisdiction_of_occurrence,mmwr_year,mmwr_week,week_ending_date,cause,count
1044,California,2014,1,01/04/2014,all_cause,3142.0
1045,California,2014,2,01/11/2014,all_cause,5371.0
1046,California,2014,3,01/18/2014,all_cause,5522.0
1047,California,2014,4,01/25/2014,all_cause,5337.0
1048,California,2014,5,02/01/2014,all_cause,5197.0
1049,California,2014,6,02/08/2014,all_cause,5186.0
1050,California,2014,7,02/15/2014,all_cause,5089.0
1051,California,2014,8,02/22/2014,all_cause,4939.0
1052,California,2014,9,03/01/2014,all_cause,4759.0
1053,California,2014,10,03/08/2014,all_cause,4756.0


In [608]:
all_ca_deaths_then = ca_deaths[(ca_deaths['mmwr_year'] != '2020') & (ca_deaths['cause'] == 'natural_cause')]

In [609]:
all_ca_deaths_now = ca_deaths[(ca_deaths['mmwr_year'] == '2020') & (ca_deaths['cause'] == 'natural_cause')] 

In [610]:
all_ca_deaths_then.head()

Unnamed: 0,jurisdiction_of_occurrence,mmwr_year,mmwr_week,week_ending_date,cause,count
18531,California,2014,1,01/04/2014,natural_cause,2932.0
18532,California,2014,2,01/11/2014,natural_cause,5017.0
18533,California,2014,3,01/18/2014,natural_cause,5181.0
18534,California,2014,4,01/25/2014,natural_cause,4986.0
18535,California,2014,5,02/01/2014,natural_cause,4842.0


In [611]:
ca_deaths_then_grouped = all_ca_deaths_then.groupby(['mmwr_week']).agg({'count':'mean'}).reset_index()

In [612]:
ca_deaths_now_grouped = all_ca_deaths_now.groupby(['mmwr_week', 'week_ending_date']).agg({'count':'mean'}).reset_index()

In [613]:
ca_deaths_then_grouped.head()

Unnamed: 0,mmwr_week,count
0,1,5192.0
1,2,5548.666667
2,3,5372.666667
3,4,5188.333333
4,5,5190.833333


In [614]:
ca_deaths_now_grouped.head()

Unnamed: 0,mmwr_week,week_ending_date,count
0,1,01/04/2020,5470.0
1,2,01/11/2020,5464.0
2,3,01/18/2020,5420.0
3,4,01/25/2020,5425.0
4,5,02/01/2020,5314.0


In [615]:
then_now = ca_deaths_then_grouped.merge(ca_deaths_now_grouped, on='mmwr_week', how='inner')

In [616]:
then_now.rename(columns={"mmwr_week": "week", "count_x": "before", "count_y": "after"}, inplace=True)

In [663]:
then_now_15 = pd.DataFrame(then_now[(then_now['week'] < 16) & (then_now['week'] > 9)])

In [664]:
then_now_15

Unnamed: 0,week,before,week_ending_date,after
9,10,5008.833333,03/07/2020,5340.0
10,11,4993.166667,03/14/2020,5107.0
11,12,4811.833333,03/21/2020,5302.0
12,13,4747.5,03/28/2020,5313.0
13,14,4768.333333,04/04/2020,5515.0
14,15,4676.5,04/11/2020,5249.0


In [665]:
then_now_15_melt = pd.melt(then_now_15, id_vars=['week', 'week_ending_date']\
                      , value_vars=['before', 'after'],
        var_name='year', value_name='count')

In [666]:
then_now_15_melt['week_ending_date'] = then_now_15_melt['week_ending_date'].astype('datetime64[ns]')

In [667]:
then_now_15_melt.dtypes

week                         int64
week_ending_date    datetime64[ns]
year                        object
count                      float64
dtype: object

In [668]:
alt.Chart(then_now_15_melt).mark_line(size=4).encode(
    x=alt.X('week_ending_date:T', title='',axis=alt.Axis(format='%b. %-d', tickCount=4, grid=False)),
    y=alt.Y('count', title='', axis=alt.Axis(tickSize=0,domainOpacity=0,\
                                   tickCount=5,offset=4, gridWidth=.6, gridColor='#dddddd', format=''), \
            scale=alt.Scale(domain=(0,6000))),
    color=alt.Color('year', legend=None)
).properties(width=600, height=320,
    title=''
 ).configure_view(strokeOpacity=0)

In [669]:
then_now_15.after.sum().round(0)

31826.0

In [670]:
then_now_15.after.sum() - then_now_15.before.sum()

2819.8333333333358

In [671]:
(((then_now_15.after.sum() - then_now_15.before.sum()) / then_now_15.before.sum())*100).round(2)

9.72

In [672]:
then_now_15['change'] = (((then_now_15['after']-then_now_15['before'])/then_now_15['before'])*100).round(2)

In [673]:
then_now_15.sort_values(by='week', ascending=True)

Unnamed: 0,week,before,week_ending_date,after,change
9,10,5008.833333,03/07/2020,5340.0,6.61
10,11,4993.166667,03/14/2020,5107.0,2.28
11,12,4811.833333,03/21/2020,5302.0,10.19
12,13,4747.5,03/28/2020,5313.0,11.91
13,14,4768.333333,04/04/2020,5515.0,15.66
14,15,4676.5,04/11/2020,5249.0,12.24


---

In [628]:
covid_deaths_counties = pd.read_csv('input/Provisional_COVID-19_Death_Counts_in_the_United_States_by_County.csv')

In [629]:
covid_deaths_counties.columns = covid_deaths_counties.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

In [630]:
covid_deaths_counties.head()

Unnamed: 0,date_as_of,first_week,last_week,state,county_name,fips_county_code,deaths_involving_covid_19,deaths_from_all_causes
0,04/28/2020,02/01/2020,04/25/2020,AL,Etowah County,1055,10,346
1,04/28/2020,02/01/2020,04/25/2020,AL,Houston County,1069,13,432
2,04/28/2020,02/01/2020,04/25/2020,AL,Jefferson County,1073,29,2052
3,04/28/2020,02/01/2020,04/25/2020,AL,Lee County,1081,24,262
4,04/28/2020,02/01/2020,04/25/2020,AL,Mobile County,1097,27,1103


In [631]:
ca_covid_deaths = pd.DataFrame(covid_deaths_counties[covid_deaths_counties['state'] == 'CA'])

In [632]:
ca_covid_deaths['covid_pct'] = ((ca_covid_deaths['deaths_involving_covid_19']\
                                /ca_covid_deaths['deaths_from_all_causes'])*100).round(2)

In [633]:
ca_covid_deaths.sort_values(by='covid_pct', ascending=False)

Unnamed: 0,date_as_of,first_week,last_week,state,county_name,fips_county_code,deaths_involving_covid_19,deaths_from_all_causes,covid_pct
20,04/28/2020,02/01/2020,04/25/2020,CA,San Mateo County,6081,34,1188,2.86
11,04/28/2020,02/01/2020,04/25/2020,CA,Los Angeles County,6037,459,16747,2.74
21,04/28/2020,02/01/2020,04/25/2020,CA,Santa Clara County,6085,62,2799,2.22
22,04/28/2020,02/01/2020,04/25/2020,CA,Tulare County,6107,16,766,2.09
9,04/28/2020,02/01/2020,04/25/2020,CA,Alameda County,6001,43,2615,1.64
14,04/28/2020,02/01/2020,04/25/2020,CA,Riverside County,6065,65,4313,1.51
16,04/28/2020,02/01/2020,04/25/2020,CA,San Bernardino County,6071,52,3492,1.49
17,04/28/2020,02/01/2020,04/25/2020,CA,San Diego County,6073,81,5637,1.44
18,04/28/2020,02/01/2020,04/25/2020,CA,San Francisco County,6075,20,1499,1.33
13,04/28/2020,02/01/2020,04/25/2020,CA,Placer County,6061,12,968,1.24


In [634]:
ca_covid_deaths.deaths_involving_covid_19.sum()

950