In [6]:
import dask.dataframe as dd
import pandas as pd

In [7]:
data_frame = dd.read_csv('timeseries.csv', dtype={'cases': 'float64', 'deaths': 'float64', 'population': 'float64'})
us_data = data_frame[(data_frame['country'] == 'United States') & (data_frame['level'] == 'state')]
us_data['date'] = dd.to_datetime(us_data['date'])

filtered_data = us_data[(us_data['date'] >= '2020-01-01') & (us_data['date'] <= '2021-02-28')]


deaths_per_state = filtered_data.groupby('state')['deaths'].sum()
avg_population_per_state = filtered_data.groupby('state')['population'].mean()

per_capita_mortality = (deaths_per_state / avg_population_per_state).compute()
per_capita_mortality = per_capita_mortality.sort_values(ascending=False)
print("Per-Capita Mortality Ranking:")
print(per_capita_mortality)

Per-Capita Mortality Ranking:
state
New Jersey                      0.096971
Connecticut                     0.077912
Massachusetts                   0.070073
New York                        0.069850
Louisiana                       0.045904
Washington, D.C.                0.045661
Rhode Island                    0.043833
Michigan                        0.042039
Illinois                        0.030218
Pennsylvania                    0.029602
Maryland                        0.029020
Delaware                        0.026575
Mississippi                     0.017606
Colorado                        0.016981
Indiana                         0.016166
Georgia                         0.014636
Ohio                            0.013175
New Hampshire                   0.012777
Minnesota                       0.012669
Washington                      0.012169
New Mexico                      0.011746
Iowa                            0.011377
Virginia                        0.010966
Nevada               

In [8]:
# Compute CFR per month
filtered_data['year_month'] = filtered_data['date'].dt.to_period('M')
cases_per_month = filtered_data.groupby(['state', 'year_month'])['cases'].sum()
deaths_per_month = filtered_data.groupby(['state', 'year_month'])['deaths'].sum()

cfr_per_month = (deaths_per_month / cases_per_month).compute()
cfr_per_month = cfr_per_month.fillna(0)
print("Case Fatality Rate per Month:")
print(cfr_per_month)

Case Fatality Rate per Month:
state    year_month
Alabama  2020-03       0.005324
         2020-04       0.029829
         2020-05       0.038967
         2020-06       0.029617
         2020-07       0.023956
                         ...   
Wyoming  2020-03       0.000000
         2020-04       0.008708
         2020-05       0.013089
         2020-06       0.016462
         2020-07       0.012681
Length: 290, dtype: float64


In [9]:
# Compute month-to-month changes in CFR
cfr_changes = cfr_per_month.groupby('state').diff().fillna(0)

# Aggregate month-to-month changes per state
cfr_trend = cfr_changes.groupby('state').sum().sort_values(ascending=False)
print("CFR Change Ranking:")
print(cfr_trend)

CFR Change Ranking:
state
New Jersey                      0.083710
Connecticut                     0.074609
Massachusetts                   0.074312
Pennsylvania                    0.064987
New Hampshire                   0.061563
Michigan                        0.061151
New York                        0.058222
Rhode Island                    0.055043
Illinois                        0.048020
Maryland                        0.044210
Minnesota                       0.039993
Washington                      0.039423
Washington, D.C.                0.038932
Ohio                            0.037463
New Mexico                      0.033150
Missouri                        0.033118
Colorado                        0.032330
West Virginia                   0.030102
Delaware                        0.029754
Indiana                         0.029178
Maine                           0.026348
California                      0.025430
Wisconsin                       0.023905
Mississippi                    