In [1]:
import dask.dataframe as dd
import pandas as pd

In [5]:
dtype_dict = {
    'population': 'float64', 
    'cases': 'float64', 
    'deaths': 'float64', 
    'aggregate': 'object', 
    'city': 'object'
}

df = dd.read_csv("timeseries.csv", dtype=dtype_dict, low_memory=False)

# Filtering the data for US states
us_states_df = df[(df['country'] == 'United States') & (df['state'].notnull())]

# Converting the  date column to datetime format
us_states_df['date'] = dd.to_datetime(us_states_df['date'])

# Filtering for the required date range
filtered_df = us_states_df[(us_states_df['date'] >= "2020-01-01") & (us_states_df['date'] <= "2021-02-28")]

# Compute total deaths per state in the period
deaths_per_state = filtered_df.groupby('state')['deaths'].sum().compute()

# Compute average population per state in the period
avg_population_per_state = filtered_df.groupby('state')['population'].mean().compute()

# Compute per-capita mortality
per_capita_mortality = (deaths_per_state / avg_population_per_state).sort_values(ascending=False)
per_capita_mortality

state
New York                        6.395701
Michigan                        3.204753
Louisiana                       2.735288
Illinois                        2.043863
New Jersey                      2.031200
Georgia                         2.026085
Pennsylvania                    1.831572
Virginia                        1.359063
Mississippi                     1.356705
Indiana                         1.353423
Ohio                            1.058943
Iowa                            0.958771
Massachusetts                   0.904775
Colorado                        0.894296
Minnesota                       0.768853
Kentucky                        0.751512
Texas                           0.737643
Missouri                        0.711146
Connecticut                     0.677952
Maryland                        0.658347
Alabama                         0.599759
North Carolina                  0.549190
Florida                         0.545793
Wisconsin                       0.437617
Nebraska  

In [15]:
#calculating  (CFR)
filtered_df['month'] = filtered_df['date'].dt.to_period("M")
cfr_per_month = filtered_df.groupby(['state', 'month']).apply(lambda x: (x['deaths'].sum() / x['cases'].sum()) * 100, meta=('x', 'f8')).compute()
cfr_per_month = cfr_per_month.fillna(0).unstack(level=-1)  # Fill NaN with 0
print(cfr_per_month)

month                         2020-01   2020-02   2020-03    2020-04  \
state                                                                  
Alabama                           NaN       NaN  0.532313   2.830899   
Alaska                            NaN       NaN  0.335008   2.314519   
American Samoa                    NaN       NaN       NaN   0.000000   
Arizona                           0.0  0.000000  0.000000   1.486545   
Arkansas                          NaN       NaN  0.915656   1.911450   
California                        0.0  0.000000  2.006735   3.479974   
Colorado                          NaN       NaN  0.939250   2.636616   
Connecticut                       NaN       NaN  1.814771   6.477626   
Delaware                          NaN       NaN  1.334107   2.734038   
Florida                           NaN       NaN  0.842669   2.905738   
Georgia                           NaN       NaN  2.185574   3.980977   
Guam                              NaN       NaN  2.702703   3.61

  cfr_per_month = filtered_df.groupby(['state', 'month']).apply(lambda x: (x['deaths'].sum() / x['cases'].sum()) * 100, meta=('x', 'f8')).compute()


In [7]:
# Calculating CFR change over time
cfr_change = cfr_per_month.diff(axis=1).sum(axis=1).sort_values(ascending=False)
print(cfr_change)

state
Connecticut                     9.500807
Michigan                        8.623360
New Jersey                      8.251406
Massachusetts                   7.437951
Pennsylvania                    7.418973
New York                        7.233961
Northern Mariana Islands        6.535948
New Hampshire                   6.436738
United States Virgin Islands    5.976096
Rhode Island                    5.821287
Washington, D.C.                5.330110
Indiana                         5.314359
Ohio                            5.275945
Louisiana                       5.138853
Colorado                        5.063691
Illinois                        4.802791
Maryland                        4.649307
Vermont                         4.559147
Delaware                        4.013148
New Mexico                      4.003795
Missouri                        3.979086
Minnesota                       3.958075
Washington                      3.921734
Mississippi                     3.743453
Kentucky  