# Casos e Mortes por COVID-19 nos EUA

Dados retirados de https://github.com/nytimes/covid-19-data/blob/master/us-counties.csv, base de dados levantada pelo New York Times. O jornal está lançando diariamente dados acumulados dos casos de coronavírus nos Estados Unidos, a nível de condado (cidade) e estado, de acordo com informações fornecidas pelos governos e departamentos de saúde locais. No caso dessa base de dados, recomendo utilizar até o momento (08/04/20) apenas os dados fornecidos por condado, que parecem estar muito mais completos e menos subnotificados. De qualquer forma, deixei o código para obter dados por estado, caso seja futuramente necessário.

O código a seguir permite obter esses dados e gerar quatro arquivos .csv estruturados de maneira a indicar, respectivamente, casos e mortes por COVID-19 nos EUA, a nível de condado e estado.

In [24]:
import numpy as np
import pandas as pd

In [25]:
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
data_us = pd.read_csv(url, error_bad_lines=False)
data_us.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [3]:
dates = data_us['date'].unique()
ndates = len(dates)
counties = data_us['county'].unique()
ncounties = len(counties)
states = data_us['state'].unique()
nstates = len(states)

# Casos acumulados por condado

Criando DataFrame covid_us, onde linhas representam as datas e colunas representam os counties. Esse DataFrame será preenchido posteriormente com os casos acumulados.

In [4]:
covid_us = pd.DataFrame(np.zeros((ndates,ncounties)))
covid_us.columns = counties
covid_us.index = dates


In [5]:
data_us.index = data_us['date']

In [6]:
for index, row in data_us.iterrows():
    covid_us.at[index,row['county']] = row['cases']


In [7]:
covid_us

Unnamed: 0,Snohomish,Cook,Orange,Maricopa,Los Angeles,Santa Clara,Suffolk,San Francisco,Dane,San Diego,...,Elk,Callahan,Crosby,Dallam,Hansford,Grand Isle,Dinwiddie,Lunenburg,Rappahannock,Brooke
2020-01-21,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-03,1449.0,6473.0,4.0,1049.0,4566.0,1094.0,10154.0,500.0,244.0,1112.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-04,1502.0,7439.0,5.0,1171.0,5277.0,1148.0,12328.0,529.0,253.0,1209.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-05,1558.0,8043.0,5.0,1326.0,5940.0,1207.0,12933.0,571.0,269.0,1326.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-06,1602.0,8728.0,5.0,1433.0,6360.0,1224.0,14473.0,586.0,278.0,1404.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Casos diários por condado

In [8]:
covid_us_daily = covid_us.diff().iloc[1:]
covid_us_daily

Unnamed: 0,Snohomish,Cook,Orange,Maricopa,Los Angeles,Santa Clara,Suffolk,San Francisco,Dane,San Diego,...,Elk,Callahan,Crosby,Dallam,Hansford,Grand Isle,Dinwiddie,Lunenburg,Rappahannock,Brooke
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-26,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-03,74.0,898.0,0.0,88.0,521.0,75.0,1408.0,50.0,16.0,146.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-04,53.0,966.0,1.0,122.0,711.0,54.0,2174.0,29.0,9.0,97.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-05,56.0,604.0,0.0,155.0,663.0,59.0,605.0,42.0,16.0,117.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-06,44.0,685.0,0.0,107.0,420.0,17.0,1540.0,15.0,9.0,78.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
covid_us_daily.to_csv('cases_per_county_daily.csv')

# Casos acumulados por estado

Criando DataFrame covid_us_states, onde linhas representam as datas e colunas representam os estados. Esse DataFrame será preenchido posteriormente com os casos acumulados.

In [10]:
covid_us_states = pd.DataFrame(np.zeros((ndates,nstates)))
covid_us_states.columns = states
covid_us_states.index = dates


In [11]:
for index, row in data_us.iterrows():
    covid_us_states.at[index,row['state']] = row['cases']

In [12]:
covid_us_states

Unnamed: 0,Washington,Illinois,California,Arizona,Massachusetts,Wisconsin,Texas,Nebraska,Utah,Oregon,...,Alaska,Maine,Alabama,Idaho,Montana,Puerto Rico,Virgin Islands,Guam,West Virginia,Northern Mariana Islands
2020-01-21,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-03,289.0,4.0,9.0,13.0,825.0,2.0,3.0,1.0,63.0,23.0,...,1.0,89.0,3.0,1.0,38.0,378.0,38.0,154.0,5.0,8.0
2020-04-04,289.0,6.0,9.0,14.0,915.0,2.0,3.0,1.0,63.0,23.0,...,1.0,95.0,3.0,1.0,38.0,452.0,42.0,223.0,7.0,8.0
2020-04-05,363.0,6.0,10.0,13.0,978.0,2.0,3.0,1.0,53.0,26.0,...,1.0,101.0,3.0,1.0,40.0,475.0,42.0,265.0,10.0,8.0
2020-04-06,380.0,6.0,11.0,15.0,1077.0,2.0,1.0,1.0,78.0,25.0,...,1.0,108.0,3.0,1.0,47.0,513.0,43.0,266.0,11.0,8.0


# Casos diários por estado

In [13]:
covid_us_states_daily = covid_us_states.diff().iloc[1:]
covid_us_states_daily

Unnamed: 0,Washington,Illinois,California,Arizona,Massachusetts,Wisconsin,Texas,Nebraska,Utah,Oregon,...,Alaska,Maine,Alabama,Idaho,Montana,Puerto Rico,Virgin Islands,Guam,West Virginia,Northern Mariana Islands
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-26,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-03,73.0,1.0,1.0,0.0,158.0,0.0,0.0,0.0,16.0,5.0,...,0.0,15.0,1.0,0.0,3.0,62.0,1.0,2.0,2.0,0.0
2020-04-04,0.0,2.0,0.0,1.0,90.0,0.0,0.0,0.0,0.0,0.0,...,0.0,6.0,0.0,0.0,0.0,74.0,4.0,69.0,2.0,0.0
2020-04-05,74.0,0.0,1.0,-1.0,63.0,0.0,0.0,0.0,-10.0,3.0,...,0.0,6.0,0.0,0.0,2.0,23.0,0.0,42.0,3.0,0.0
2020-04-06,17.0,0.0,1.0,2.0,99.0,0.0,-2.0,0.0,25.0,-1.0,...,0.0,7.0,0.0,0.0,7.0,38.0,1.0,1.0,1.0,0.0


In [14]:
covid_us_states_daily.to_csv('cases_per_state_daily.csv')

# Mortes causadas pela COVID-19

Importante notar que, no caso de alguns counties/states, não são disponibilizados dados sobre mortes por COVID-19.

# Acumulado por condado

Criando DataFrame deaths_us, onde linhas representam as datas e colunas representam os counties. Esse DataFrame será preenchido posteriormente com as mortes acumuladas.

In [15]:
deaths_us = pd.DataFrame(np.zeros((ndates,ncounties)))
deaths_us.columns = counties
deaths_us.index = dates
for index, row in data_us.iterrows():
    deaths_us.at[index,row['county']] = row['deaths']


In [16]:
deaths_us

Unnamed: 0,Snohomish,Cook,Orange,Maricopa,Los Angeles,Santa Clara,Suffolk,San Francisco,Dane,San Diego,...,Elk,Callahan,Crosby,Dallam,Hansford,Grand Isle,Dinwiddie,Lunenburg,Rappahannock,Brooke
2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-03,41.0,141.0,0.0,17.0,89.0,39.0,96.0,7.0,4.0,17.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-04,44.0,167.0,0.0,28.0,117.0,40.0,175.0,8.0,8.0,18.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-05,48.0,186.0,0.0,31.0,132.0,40.0,175.0,8.0,8.0,19.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-06,57.0,209.0,0.0,31.0,147.0,40.0,237.0,9.0,9.0,19.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Diário por condado

In [17]:
deaths_us_daily = deaths_us.diff().iloc[1:]
deaths_us_daily

Unnamed: 0,Snohomish,Cook,Orange,Maricopa,Los Angeles,Santa Clara,Suffolk,San Francisco,Dane,San Diego,...,Elk,Callahan,Crosby,Dallam,Hansford,Grand Isle,Dinwiddie,Lunenburg,Rappahannock,Brooke
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-03,2.0,34.0,0.0,5.0,11.0,2.0,12.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-04,3.0,26.0,0.0,11.0,28.0,1.0,79.0,1.0,4.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-05,4.0,19.0,0.0,3.0,15.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-04-06,9.0,23.0,0.0,0.0,15.0,0.0,62.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
deaths_us_daily.to_csv('deaths_per_county_daily.csv')

# Acumulado por estado

Criando DataFrame deaths_us_states, onde linhas representam as datas e colunas representam os estados. Esse DataFrame será preenchido posteriormente com as mortes acumuladas.

In [19]:
deaths_us_state = pd.DataFrame(np.zeros((ndates,nstates)))
deaths_us_state.columns = states
deaths_us_state.index = dates

In [20]:
for index, row in data_us.iterrows():
    deaths_us_state.at[index,row['state']] = row['deaths']

In [21]:
deaths_us_state

Unnamed: 0,Washington,Illinois,California,Arizona,Massachusetts,Wisconsin,Texas,Nebraska,Utah,Oregon,...,Alaska,Maine,Alabama,Idaho,Montana,Puerto Rico,Virgin Islands,Guam,West Virginia,Northern Mariana Islands
2020-01-21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-03,7.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,1.0,3.0,...,0.0,1.0,0.0,0.0,0.0,15.0,0.0,4.0,0.0,1.0
2020-04-04,7.0,0.0,0.0,0.0,17.0,0.0,0.0,0.0,1.0,3.0,...,0.0,2.0,0.0,0.0,0.0,18.0,0.0,4.0,0.0,1.0
2020-04-05,12.0,0.0,0.0,0.0,17.0,0.0,0.0,0.0,1.0,3.0,...,0.0,2.0,0.0,0.0,0.0,20.0,0.0,4.0,0.0,1.0
2020-04-06,12.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,1.0,3.0,...,0.0,2.0,0.0,0.0,0.0,21.0,1.0,4.0,0.0,1.0


# Diário por estado

In [22]:
deaths_us_state_daily = deaths_us_state.diff().iloc[1:]
deaths_us_state_daily

Unnamed: 0,Washington,Illinois,California,Arizona,Massachusetts,Wisconsin,Texas,Nebraska,Utah,Oregon,...,Alaska,Maine,Alabama,Idaho,Montana,Puerto Rico,Virgin Islands,Guam,West Virginia,Northern Mariana Islands
2020-01-22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-03,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0
2020-04-04,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
2020-04-05,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0
2020-04-06,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0


In [23]:
deaths_us_state_daily.to_csv('deaths_per_state_daily.csv')