In [1]:
import dask.dataframe as dd
import pandas as pd
import numpy as np

In [2]:
# Load dataset into a Dask dataframe
df = dd.read_csv("timeseries.csv", dtype={'population': 'float64', 'cases': 'float64', 'deaths': 'float64'}, parse_dates=["date"])

# Filter for US states
df_us = df[(df['country'] == 'United States') & (df['level'] == 'state')]

# Filter time period
start_date = "2020-01-01"
end_date = "2021-02-28"
df_us = df_us[(df_us['date'] >= start_date) & (df_us['date'] <= end_date)]

# Compute per-capita mortality
total_deaths = df_us.groupby('state')['deaths'].sum().compute()
avg_population = df_us.groupby('state')['population'].mean().compute()
per_capita_mortality = (total_deaths / avg_population).sort_values(ascending=False)
print(per_capita_mortality)

state
New Jersey                      0.096971
Connecticut                     0.077912
Massachusetts                   0.070073
New York                        0.069850
Louisiana                       0.045904
Washington, D.C.                0.045661
Rhode Island                    0.043833
Michigan                        0.042039
Illinois                        0.030218
Pennsylvania                    0.029602
Maryland                        0.029020
Delaware                        0.026575
Mississippi                     0.017606
Colorado                        0.016981
Indiana                         0.016166
Georgia                         0.014636
Ohio                            0.013175
New Hampshire                   0.012777
Minnesota                       0.012669
Washington                      0.012169
New Mexico                      0.011746
Iowa                            0.011377
Virginia                        0.010966
Nevada                          0.010136
Alabama   

In [3]:
# Compute Case Fatality Rate (CFR) per month
df_us['month'] = df_us['date'].dt.to_period('M')
total_cases_monthly = df_us.groupby(['state', 'month'])['cases'].sum().compute()
total_deaths_monthly = df_us.groupby(['state', 'month'])['deaths'].sum().compute()
CFR = (total_deaths_monthly / total_cases_monthly).unstack().fillna(0)
print(CFR)

month                          2020-03   2020-04   2020-05   2020-06  \
state                                                                  
Alabama                       0.005324  0.029829  0.038967  0.029617   
Alaska                        0.000000  0.026795  0.024091  0.013322   
American Samoa                0.000000  0.000000  0.000000  0.000000   
Arizona                       0.000000  0.029733  0.039077  0.004230   
Arkansas                      0.009231  0.018736  0.021011  0.015134   
California                    0.020168  0.034800  0.039834  0.031787   
Colorado                      0.018294  0.043694  0.053447  0.054194   
Connecticut                   0.018184  0.064257  0.089880  0.093442   
Delaware                      0.013341  0.027265  0.036570  0.041941   
Florida                       0.008534  0.029055  0.042748  0.034057   
Georgia                       0.021317  0.038733  0.043180  0.042942   
Guam                          0.027027  0.036155  0.031863  0.02

In [4]:
# Compute ranking of states based on CFR changes
delta_CFR = CFR.diff(axis=1).fillna(0)
aggr_CFR_changes = delta_CFR.abs().sum(axis=1).sort_values(ascending=False)
print(aggr_CFR_changes)

state
Northern Mariana Islands        0.289362
New Jersey                      0.171437
Connecticut                     0.168700
United States Virgin Islands    0.167598
Michigan                        0.166088
Massachusetts                   0.141675
Pennsylvania                    0.140805
New York                        0.135330
New Hampshire                   0.125815
Puerto Rico                     0.116179
Rhode Island                    0.111610
Arizona                         0.108636
Ohio                            0.105202
Louisiana                       0.102905
Missouri                        0.101753
Minnesota                       0.101041
Maryland                        0.096822
Indiana                         0.093083
Washington, D.C.                0.092233
Colorado                        0.090094
Illinois                        0.083157
Oklahoma                        0.082268
West Virginia                   0.082170
Wisconsin                       0.081135
New Mexico