In [1]:
import dask.dataframe as dd
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
dtypes = {
    'aggregate': 'object',
    'cases': 'float64',
    'city': 'object',
    'population': 'float64',
    'country': 'object',
    'level': 'object',
    'state': 'object',
    'deaths': 'float64',
    'date': 'object'
}

In [3]:
# Load the CSV file into a Dask dataframe
df = dd.read_csv('timeseries.csv', dtype = dtypes)

df = df[
    (df['country'] == 'United States') & 
    (df['level'] == 'state') &
    (df['date'] >= '2020-01-01') & 
    (df['date'] <= '2021-02-28')
].compute()

df['date'] = pd.to_datetime(df['date'])

  df = reader(bio, **kwargs)


In [4]:
state_stats = df.groupby('state').agg({
    'deaths': 'max',
    'population': 'mean'
}).reset_index()

state_stats['per_capita_mortality'] = state_stats['deaths'] / state_stats['population']

mortality_ranking = state_stats.sort_values('per_capita_mortality', ascending=False)
print(mortality_ranking[['state', 'per_capita_mortality']])

                           state  per_capita_mortality
31                    New Jersey              0.001713
33                      New York              0.001280
7                    Connecticut              0.001216
22                 Massachusetts              0.001187
42                  Rhode Island              0.000906
52              Washington, D.C.              0.000792
19                     Louisiana              0.000707
23                      Michigan              0.000623
14                      Illinois              0.000554
21                      Maryland              0.000536
40                  Pennsylvania              0.000527
8                       Delaware              0.000526
15                       Indiana              0.000392
25                   Mississippi              0.000373
6                       Colorado              0.000295
30                 New Hampshire              0.000277
10                       Georgia              0.000269
24        

In [5]:
df['year_month'] = df['date'].dt.to_period('M')

monthly_stats = df.groupby(['state', 'year_month']).agg({
    'cases': 'max',
    'deaths': 'max'
}).reset_index()

monthly_stats['cfr'] = monthly_stats['deaths'] / monthly_stats['cases']

cfr_matrix = monthly_stats.pivot(
    index='state',
    columns='year_month',
    values='cfr'
)
print(cfr_matrix.head())

year_month      2020-01  2020-02   2020-03   2020-04   2020-05   2020-06  \
state                                                                      
Alabama             NaN      NaN  0.013013  0.038325  0.035094  0.024970   
Alaska              NaN      NaN       NaN  0.025352  0.021368  0.010705   
American Samoa      NaN      NaN       NaN       NaN       NaN       NaN   
Arizona             NaN      NaN       NaN  0.041841  0.045295  0.014934   
Arkansas            NaN      NaN  0.014184  0.018740  0.018825  0.013462   

year_month       2020-07  
state                     
Alabama         0.022911  
Alaska          0.014401  
American Samoa       NaN  
Arizona         0.018442  
Arkansas        0.012323  


In [6]:
cfr_changes = cfr_matrix.diff(axis=1)
total_changes = cfr_changes.abs().sum(axis=1)
cfr_change_ranking = total_changes.sort_values(ascending=False)
print(cfr_change_ranking)

state
Washington                      0.095788
Northern Mariana Islands        0.078341
Connecticut                     0.076915
Michigan                        0.072161
New Jersey                      0.069866
Missouri                        0.064549
Pennsylvania                    0.062147
Massachusetts                   0.061106
New York                        0.060628
United States Virgin Islands    0.059253
Wisconsin                       0.057251
Oklahoma                        0.057096
Florida                         0.056135
New Hampshire                   0.056022
Rhode Island                    0.053236
Maryland                        0.052652
Maine                           0.048384
Nevada                          0.047563
North Carolina                  0.047544
Ohio                            0.045417
South Carolina                  0.044700
Indiana                         0.044627
Louisiana                       0.043791
Vermont                         0.043505
Alabama   