In [1]:
import dask.dataframe as dd
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
# Load the dataset into a Dask DataFrame
covid_data = dd.read_csv(
    'timeseries.csv',
    parse_dates=['date'],
    dtype={
        'aggregate': 'object',
        'cases': 'float64',
        'city': 'object',
        'population': 'float64'
    },
    low_memory=False
)

In [3]:
# Filter for US states data
us_states = covid_data[covid_data['country'] == 'United States']
us_states = us_states[us_states['level'] == 'state']

us_states['date'] = dd.to_datetime(us_states['date'])

mask = (us_states['date'] >= '2020-01-01') & (us_states['date'] <= '2021-02-28')
filtered_data = us_states[mask]

total_deaths = filtered_data.groupby('state')['deaths'].sum().compute()
avg_population = filtered_data.groupby('state')['population'].mean().compute()
per_capita_mortality = total_deaths / avg_population

# Sort states by per-capita mortality
mortality_ranking = per_capita_mortality.sort_values(ascending=False)
print(mortality_ranking)

state
New Jersey                      0.096971
Connecticut                     0.077912
Massachusetts                   0.070073
New York                        0.069850
Louisiana                       0.045904
Washington, D.C.                0.045661
Rhode Island                    0.043833
Michigan                        0.042039
Illinois                        0.030218
Pennsylvania                    0.029602
Maryland                        0.029020
Delaware                        0.026575
Mississippi                     0.017606
Colorado                        0.016981
Indiana                         0.016166
Georgia                         0.014636
Ohio                            0.013175
New Hampshire                   0.012777
Minnesota                       0.012669
Washington                      0.012169
New Mexico                      0.011746
Iowa                            0.011377
Virginia                        0.010966
Nevada                          0.010136
Alabama   

In [4]:
monthly_data = filtered_data.groupby(['state', filtered_data['date'].dt.to_period('M')])
monthly_deaths = monthly_data['deaths'].sum().compute()
monthly_cases = monthly_data['cases'].sum().compute()

cfr_matrix = pd.DataFrame()
for state in monthly_deaths.index.get_level_values('state').unique():
    state_deaths = monthly_deaths[state]
    state_cases = monthly_cases[state]
    cfr = (state_deaths / state_cases).fillna(0)
    cfr_matrix[state] = cfr
print(cfr_matrix)

          Alabama    Alaska  American Samoa   Arizona  Arkansas  California  \
date                                                                          
2020-03  0.005324  0.000000             NaN  0.000000  0.009231    0.020168   
2020-04  0.029829  0.026795             0.0  0.029733  0.018736    0.034800   
2020-05  0.038967  0.024091             0.0  0.039077  0.021011    0.039834   
2020-06  0.029617  0.013322             0.0  0.004230  0.015134    0.031787   
2020-07  0.023956  0.014205             0.0  0.019471  0.012545    0.025430   

         Colorado  Connecticut  Delaware   Florida  ...     Texas  \
date                                                ...             
2020-03  0.018294     0.018184  0.013341  0.008534  ...  0.011850   
2020-04  0.043694     0.064257  0.027265  0.029055  ...  0.024013   
2020-05  0.053447     0.089880  0.036570  0.042748  ...  0.027363   
2020-06  0.054194     0.093442  0.041941  0.034057  ...  0.020100   
2020-07  0.050624     0.092793  

In [5]:
# Calculate month-to-month changes in CFR
cfr_changes = cfr_matrix.diff()
total_cfr_change = cfr_changes.sum()
cfr_change_ranking = total_cfr_change.sort_values(ascending=False)
print(cfr_change_ranking)

New Jersey                      0.083710
Connecticut                     0.074609
Massachusetts                   0.067364
Pennsylvania                    0.064987
New Hampshire                   0.061563
Michigan                        0.061151
New York                        0.058222
Rhode Island                    0.055043
Maryland                        0.044210
Minnesota                       0.039993
Washington, D.C.                0.038932
Ohio                            0.037463
Illinois                        0.035138
New Mexico                      0.033150
Missouri                        0.033118
Colorado                        0.032330
West Virginia                   0.030102
Delaware                        0.029754
Indiana                         0.029178
Maine                           0.026348
Mississippi                     0.022253
North Dakota                    0.020217
Montana                         0.020157
Arizona                         0.019471
Alabama         