In [1]:
! pip install plotly==4.8.1
! pip install bar_chart_race

Collecting plotly==4.8.1
[?25l  Downloading https://files.pythonhosted.org/packages/70/56/eabdc7b7187cdb9d6121f6de2831ad5b85f7d002fa4bfe0476dbdb554bf6/plotly-4.8.1-py2.py3-none-any.whl (11.5MB)
[K     |████████████████████████████████| 11.5MB 293kB/s 
Installing collected packages: plotly
  Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-4.8.1
Collecting bar_chart_race
[?25l  Downloading https://files.pythonhosted.org/packages/09/01/f6d1a1a0978b39560843c54be7349804d7d2faef0a869acd7c8a6fc920b0/bar_chart_race-0.1.0-py3-none-any.whl (156kB)
[K     |████████████████████████████████| 163kB 2.8MB/s 
Installing collected packages: bar-chart-race
Successfully installed bar-chart-race-0.1.0


In [2]:
import pandas as pd
import numpy as np
import os, math
from shutil import rmtree
import bar_chart_race as bcr

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from datetime import datetime
from datetime import timedelta

pd.options.plotting.backend = "plotly"

In [3]:
if os.path.isdir('COVID-19-master'): rmtree('COVID-19-master')
print("Starting Brazil data")
import requests, zipfile, io

csse_path = "https://github.com/peixebabel/COVID-19/archive/master.zip"

r = requests.get(csse_path, stream=True)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall()

Starting Brazil data


In [4]:
root_path = os.path.join(os.getcwd(), "COVID-19-master/data/")
save_path = os.path.join(os.getcwd())#, "COVID-19-master/imagens/")
save_path = os.getcwd()
casos_br_path = os.path.join(os.getcwd(), root_path+"casos-br-total.csv")
df = pd.read_csv(casos_br_path)

In [5]:
df.tail()

Unnamed: 0,Data,Suspeitos,Confirmados,Mortes
138,12/07,,1864681,72100.0
139,13/07,,1884967,72833.0
140,14/07,,1926824,74133.0
141,15/07,,1966748,75366.0
142,16/07,,2012151,76688.0


In [6]:
def plot_acumulado(column, filename, title, color, dias_projecao):

  ## Predict next seven days ##
  conf7 = list(df[column].to_numpy()[-7:])
  tc = [conf7[i+1]/conf7[i] for i in range(len(conf7)-1)]
  tc = np.array(tc).mean()
  for i in range(dias_projecao):
    conf7.append(conf7[-1]*tc)
  
  df_pred = pd.DataFrame(columns=df.columns)
  df_pred[column] = conf7[-dias_projecao:]
  ultima_data = df['Data'].to_numpy()[-1]

  ultima_data = datetime.strptime(ultima_data+'/2020', '%d/%m/%Y')
  futuro = [ultima_data + timedelta(days=i) for i in range(1, dias_projecao+1) ]
  df_pred['Data'] = [f.strftime("%d/%m") for f in futuro]
  #############################

  # fig = df.plot.bar(x='Data', y=column)
  fig = go.Figure([go.Bar(x=df['Data'], y=df[column], name=filename.capitalize())])
  fig.update_traces(marker_color=color)
  fig.update_xaxes(tickangle=-90)
  
  fig_pred = go.Figure([go.Bar(x=df_pred['Data'], y=df_pred[column], name='Projeção para os próximos 7 dias')])
  fig_pred.update_traces(marker_color='silver')
  fig.add_trace(fig_pred.data[0])

  # fig.update_layout(title=title, title_x=0.5,title_y=0.85, width=850, height=500,
  fig.update_layout(title=title, title_x=0.5,title_y=0.85, autosize=True,
                    showlegend=True, legend_orientation="h", legend=dict(x=0.25, y=1.01))

  fig.write_html(os.path.join(save_path, filename+'.html'))
  fig.show()

plot_acumulado("Confirmados", 'casos', "Casos acumulados no Brasil", 'royalblue', 7)
plot_acumulado("Mortes", 'obitos', "Óbitos acumulados no Brasil", 'darkred', 7)

In [7]:
df['Casos por dia'] = df['Confirmados'].diff()
df['Mortes por dia'] = df['Mortes'].diff()

df['Casos média móvel'] = df['Casos por dia'].rolling(7).mean()
df['Mortes média móvel'] = df['Mortes por dia'].rolling(7).mean()

def plot_pordia(column, column_mean, filename, color):
  fig = make_subplots(rows=1, cols=1)

  fig_ = df.plot.bar(x='Data', y=column)
  fig_.update_traces(marker_color=color[0], opacity=0.5, name=column, hovertemplate=None)
  fig.add_trace(fig_.data[0],row=1,col=1)

  fig__ = df.plot(x='Data', y=column_mean)
  fig__.update_traces(mode="markers+lines", hovertemplate=None,
                  marker=dict(size=3, color='white'), line=dict(width=4, color=color[1]),
                  name='Média móvel')
  fig.add_trace(fig__.data[0],row=1,col=1)


  fig.update_xaxes(tickangle=-90)
  fig.update_layout(title=column, hovermode = 'x unified', title_x=0.5,title_y=0.92, autosize=True)#width=800, height=450,)

  fig.write_html(os.path.join(save_path, filename+'.html'))
  fig.show()

plot_pordia('Mortes por dia', 'Mortes média móvel', 'obitos-por-dia', ['salmon', 'darkred'])
plot_pordia('Casos por dia', 'Casos média móvel', 'casos-por-dia', ['deepskyblue','royalblue'])

# Estados

In [8]:
print("Starting Brazil per state")
pop_estado = {
    'Rondônia': 1749000,
    'Acre': 790101,
    'Amazonas': 3874000,
    'Roraima': 496936,
    'Pará': 8074000,
    'Amapá': 751000,
    'Tocantins': 1497000,
    'Maranhão': 6851000,
    'Piauí': 3195000,
    'Ceará': 8843000,
    'Rio Grande do Norte': 3409000,
    'Paraíba': 3944000,
    'Pernambuco': 9278000,
    'Alagoas': 3322000,
    'Sergipe': 2220000,
    'Bahia': 15130000,
    'Minas Gerais': 20870000,
    'Espírito Santo': 3885000,
    'Rio de Janeiro': 16460000,
    'São Paulo': 44040000,
    'Paraná': 11080000,
    'Santa Catarina': 6727000,
    'Rio Grande do Sul': 11290000,
    'Mato Grosso do Sul': 2620000,
    'Mato Grosso': 3224000,
    'Goiás': 6523000,
    'Distrito Federal': 2570000,
}

Starting Brazil per state


In [9]:
df = pd.DataFrame(columns=['Data', 'Estado', 'Ministério', 'Óbitos'])

for csv in sorted(os.listdir(root_path))[6:-1]:

  df_dia = pd.read_csv(os.path.join(root_path, csv))
  df_dia.columns = df_dia.iloc[2]
  df_dia.drop([0,1,2, 30], inplace=True)
  df_dia.drop('Secretarias', axis=1, inplace=True)

  df_dia['Data'] = [csv.split('.')[0]] * len(pop_estado)

  if 'Óbitos' not in df_dia.columns:
    df_dia['Óbitos'] = [np.nan] * len(pop_estado)

  df = df.append(df_dia)

display(df.tail())

Unnamed: 0,Data,Estado,Ministério,Óbitos,Óbitos Ministério
25,2020-07-16,Roraima (RR),24397,425,415
26,2020-07-16,Santa Catarina (SC),49781,588,588
27,2020-07-16,São Paulo (SP),402048,19038,19038
28,2020-07-16,Sergipe (SE),41226,1071,1071
29,2020-07-16,Tocantins (TO),16672,278,278


In [10]:
# res = {}
# for estado in df_casos_diff.columns[:-1]:
#   conf7 = list(df_obitos[estado].to_numpy()[-10:])
#   tc = [conf7[i+1]/conf7[i] for i in range(len(conf7)-1)]
#   tc = np.array(tc).mean()

#   res[tc] = estado

# a = sorted(res)
# for k in range(1,7):
#   print(res[a[k]], a[k])
# # print()

In [11]:
def fill_df(column):
  df = pd.DataFrame()
  datas = []
  for csv in sorted(os.listdir(root_path))[6:-1]:
    
    datas.append(csv.split('.')[0])
    df_dia = pd.read_csv(os.path.join(root_path, csv))
    df_dia = df_dia.T
    df_dia.index = df_dia.iloc[:, 2]
    df_dia.drop([0,1,2, 30], axis=1, inplace=True)
    df_dia.columns = df_dia.loc['Estado']
    df_dia.drop('Estado', inplace=True)
    
    if column not in df_dia.index:
      row=pd.DataFrame([[np.nan] * 27], columns=df_dia.columns)

    else:    
      row = df_dia.loc[column]

    df = df.append(row)


  df['Data'] = datas
  return df

df_casos = fill_df('Ministério')
df_obitos = fill_df('Óbitos')
display(df_casos.tail())
display(df_obitos.tail())

# Remover quando atualizar dias 5 e 6
# verificar se é necessário manter o to_numeric
# df_casos = df_casos.iloc[:-4]
# df_obitos = df_obitos.iloc[:-4]
df_casos_diff = df_casos.copy()
df_obitos_diff = df_obitos.copy()

df_casos_rolling = df_casos.copy()
df_obitos_rolling = df_obitos.copy()

for estado in df_casos.columns[:-1]:
  df_casos.loc[:,estado] = pd.to_numeric(df_casos[estado])
  df_obitos.loc[:,estado] = pd.to_numeric(df_obitos[estado])

  df_casos_diff[estado] = df_casos[estado].diff()
  df_obitos_diff[estado] = df_obitos[estado].diff()

  df_casos_rolling[estado] = df_casos_diff[estado].rolling(7).mean()
  df_obitos_rolling[estado] = df_obitos_diff[estado].rolling(7).mean()


Unnamed: 0,Acre (AC),Alagoas (AL),Amapá (AP),Amazonas (AM),Bahia (BA),Ceará (CE),Distrito Federal(DF),Espírito Santo (ES),Goiás (GO),Maranhão (MA),Mato Grosso (MT),Mato Grosso do Sul (MS),Minas Gerais (MG),Paraná (PR),Paraíba (PB),Pará (PA),Pernambuco (PE),Piauí (PI),Rio Grande do Norte (RN),Rio Grande do Sul (RS),Rio de Janeiro (RJ),Rondônia (RO),Roraima (RR),Santa Catarina (SC),Sergipe (SE),São Paulo (SP),Tocantins (TO),Data
Ministério,16190,45330,31364,83526,105763,136785,70712,62799,36244,98952,28411,13197,75851,42537,60784,125714,72470,32963,39421,39240,129684,26728,22225,43031,37631,371997,15132,2020-07-12
Ministério,16260,46296,31552,84412,106891,137206,72284,63879,36696,99830,28792,13461,76822,43582,61108,126509,72901,33359,39800,39656,132044,27050,22627,43815,38221,374607,15307,2020-07-13
Ministério,16479,47005,31885,85641,110029,139437,73654,65213,37832,101467,29279,13934,78643,45363,62462,128570,73576,34156,40341,40993,132822,27528,22968,46050,39167,386607,15723,2020-07-14
Ministério,16672,47864,32408,86944,112993,141248,75379,66352,38895,102469,30319,14631,82010,47124,63939,130834,74960,35445,40654,42239,134449,27917,23681,47976,40139,393176,16031,2020-07-15
Ministério,16865,48734,33004,88025,116373,144000,77621,68118,40201,104126,31111,15330,84470,49405,65423,133039,76091,36542,40978,45344,134573,28654,24397,49781,41226,402048,16672,2020-07-16


Estado,Acre (AC),Alagoas (AL),Amapá (AP),Amazonas (AM),Bahia (BA),Ceará (CE),Distrito Federal(DF),Espírito Santo (ES),Goiás (GO),Maranhão (MA),Mato Grosso (MT),Mato Grosso do Sul (MS),Minas Gerais (MG),Pará (PA),Paraíba (PB),Paraná (PR),Pernambuco (PE),Piauí (PI),Rio de Janeiro (RJ),Rio Grande do Norte (RN),Rio Grande do Sul (RS),Rondônia (RO),Roraima (RR),Santa Catarina (SC),São Paulo (SP),Sergipe (SE),Tocantins (TO),Data
Óbitos,426,1281,474,3039,2483,6869,902,2010,851,2463,1077,161,1576,5293,1284,1045,5595,960,11415,1394,962,628,397,497,17848,984,255,2020-07-12
Óbitos,430,1297,478,3048,2535,6975,930,2040,880,2501,1105,167,1615,5318,1302,1089,5652,992,11474,1404,995,647,398,517,17907,1012,259,2020-07-13
Óbitos,436,1314,483,3064,2584,6977,960,2082,910,2536,1157,177,1688,5337,1342,1146,5715,1019,11624,1449,1060,663,403,534,18324,1033,267,2020-07-14
Óbitos,446,1331,488,3080,2638,7081,1001,2097,959,2572,1197,183,1752,5385,1383,1200,5772,1043,11757,1479,1101,668,415,569,18640,1054,271,2020-07-15
Óbitos,447,1348,493,3095,2693,7139,1037,2136,1018,2608,1235,191,1834,5448,1418,1246,5836,1065,11849,1501,1141,677,425,588,19038,1071,278,2020-07-16


In [12]:
buttons_list = []
for estado in df_casos.columns[:-1]:
  button_dict = {}
  button_dict['args'] = [{'y': [df_casos[estado], df_obitos[estado], df_casos_rolling[estado], 
                                df_casos_diff[estado], df_obitos_rolling[estado], df_obitos_diff[estado]]}, 
                          ]
  button_dict['label'] = estado.split('(')[0].rstrip()
  button_dict['method'] = 'update'
  buttons_list.append(button_dict)

fig = make_subplots(rows=4, cols=1)

fig1 = df_casos.plot.bar(x='Data', y='Acre (AC)')
fig1.update_traces(marker_color='royalblue', opacity=0.8, hovertemplate=None)#mode="markers+lines", hovertemplate=None,
                   #marker=dict(size=3, color='white'), line=dict(width=5, color='royalblue'))
fig.add_trace(fig1.data[0],row=1,col=1)

fig2 = df_obitos.plot.bar(x='Data', y='Acre (AC)')
fig2.update_traces(marker_color='orangered', opacity=0.8, hovertemplate=None)#mode="markers+lines", hovertemplate=None, 
                   #marker=dict(size=3, color='white'), line=dict(width=5, color='orangered'))
fig.add_trace(fig2.data[0],row=2,col=1)

################################

fig3_bar = df_casos_diff.plot.bar(x='Data', y='Acre (AC)')
fig3_bar.update_traces(marker_color='deepskyblue', opacity=0.6, name='Valor absoluto',
                       hovertemplate=None)

fig3 = df_casos_rolling.plot(x='Data', y='Acre (AC)')
fig3.update_traces(mode="markers+lines", hovertemplate=None,
                  marker=dict(size=3, color='white'), line=dict(width=4, color='royalblue'),
                  name='Média móvel')

fig.add_trace(fig3.data[0],row=3,col=1)
fig.add_trace(fig3_bar.data[0],row=3,col=1)

################################

fig4_bar = df_obitos_diff.plot.bar(x='Data', y='Acre (AC)')
fig4_bar.update_traces(marker_color='salmon', opacity=0.6, name='Valor absoluto',
                      hovertemplate=None)

fig4 = df_obitos_rolling.plot(x='Data', y='Acre (AC)')
fig4.update_traces(mode="markers+lines", hovertemplate=None,
                   marker=dict(size=3, color='white'), line=dict(width=4, color='red'),
                   name='Média móvel')

fig.add_trace(fig4.data[0],row=4,col=1)
fig.add_trace(fig4_bar.data[0],row=4,col=1)

fig.update_layout(
    autosize=True,
    # width=950,
    height=1400,
    yaxis_title = 'Casos Acumulados',
    yaxis2_title = 'Óbitos Acumulados',
    yaxis3_title = 'Casos por dia',
    yaxis4_title = 'Óbitos por dia',
    hovermode = 'x unified',
    title = { 'text': 'Selecione o estado: ', 'font':{'size':16} },
    title_y = 0.96,
    title_x = 0.04,
    showlegend=True,
    updatemenus=[
          dict(
            # type = "dropdown",
            # type = "buttons",
            buttons=buttons_list,
            # direction="down",
            # pad={"r": 10, "t": 10},
            x=0.25,
            xanchor="left",
            y=1.05,
            font={'size':15,},
            # yanchor="top"
          ),
    ]
)

fig.write_html(os.path.join(save_path, 'resumo-por-estado.html'))
fig.show()



## Totais e por milhão

In [13]:
ultimo_dia = sorted(os.listdir(root_path))[-2]
df = pd.read_csv(os.path.join(root_path, ultimo_dia))
columns = df.iloc[2]
df.drop([0,1,2,30], inplace=True)
df.columns = columns
df.loc[:, 'Óbitos'] = pd.to_numeric(df['Óbitos'])

df.sort_values('Óbitos', inplace=True)
df.head(27)

2,Estado,Secretarias,Ministério,Óbitos,Óbitos Ministério
14,Mato Grosso do Sul (MS),15330,15330,191,191
29,Tocantins (TO),16672,16672,278,278
25,Roraima (RR),25007,24397,425,415
3,Acre (AC),16865,16865,447,447
5,Amapá (AP),33004,33004,493,493
26,Santa Catarina (SC),49781,49781,588,588
24,Rondônia (RO),28654,28654,677,677
11,Goiás (GO),40544,40201,1018,986
9,Distrito Federal(DF),77621,77621,1037,1037
20,Piauí (PI),37057,36542,1065,1043


In [14]:
ultimo_dia = sorted(os.listdir(root_path))[-2]
df = pd.read_csv(os.path.join(root_path, ultimo_dia))

def por_milhao(estados, numeros):

  # print(estados, numeros)
  for k, estado in enumerate(estados):
    e = estado.split('(')[0].rstrip()

    pop = pop_estado[e]/1000000
    numeros[k] = numeros[k]/pop 

  # print(numeros)
  return numeros

columns = df.iloc[2]
df.drop([0,1,2,30], inplace=True)
df.columns = columns

In [15]:
ultimo_dia

'2020-07-16.csv'

In [16]:
def plot_estado(df, column, filename, title, colors):

  df[column] = pd.to_numeric(df[column])
  df = df.sort_values(column, ascending=True)

  numeros = df[column].to_numpy(copy=True)
  pormilhao = por_milhao(df['Estado'], numeros)
  df['Por milhão'] = pormilhao

  ticks = [estado.split('(')[-1][:2] for estado in df['Estado'].to_numpy()]
  df['Estado'] = ticks

  fig = make_subplots(rows=1, cols=2)

  fig1 = df.plot.barh(x=column, y='Estado', text=column)
  fig1.update_traces(marker_color=colors[0])
  fig.add_trace(fig1.data[0], row=1, col=1)

  fig2 = df.plot.barh(x='Por milhão', y='Estado', text='Por milhão')
  fig2.update_traces(marker_color=colors[1])
  fig.add_trace(fig2.data[0], row=1, col=2)
  
  # fig.update_traces(textposition='outside')
  fig.update_layout(autosize=True, margin=dict(t=50),height=650,#width=950, 
                    annotations=[ dict(x=0., y=1.1, text=title+' totais', showarrow=False, 
                                       xref='paper', yref='paper', font=dict(size=18)), 
                                 dict(x=0.84, y=1.1, xref='paper', yref='paper', font=dict(size=18),
                                      text=title+' por milhão de habitantes',  showarrow=False), ],)
  fig.update_xaxes(nticks=10)
  fig.write_html(os.path.join(save_path, filename+'.html'))
  fig.show()

plot_estado(df, 'Secretarias', 'casos-por-estado', 'Casos', ['royalblue', 'deepskyblue'])
plot_estado(df, 'Óbitos', 'obitos-por-estado', 'Óbitos', ['darkred', 'salmon'])

In [17]:
def fill_df(column):
  df = pd.DataFrame()
  datas = []
  for csv in sorted(os.listdir(root_path))[6:-1]:
    
    datas.append(csv.split('.')[0])
    df_dia = pd.read_csv(os.path.join(root_path, csv))
    df_dia = df_dia.T
    df_dia.index = df_dia.iloc[:, 2]
    df_dia.drop([0,1,2, 30], axis=1, inplace=True)
    df_dia.columns = df_dia.loc['Estado']
    df_dia.drop('Estado', inplace=True)
    
    if column not in df_dia.index:
      row=pd.DataFrame([[0] * 27], columns=df_dia.columns)

    else:    
      row = df_dia.loc[column]

    df = df.append(row)


  df['Data'] = datas
  df.set_index('Data', inplace=True)

  for column in df.columns:
    estado = column.split('(')[0].rstrip()
    pop = pop_estado[estado]
    pop /= 100000
    df.loc[:,column] = pd.to_numeric(df[column])
    df.loc[:,column] = df[column].divide(pop)

  return df

df_casos = fill_df('Ministério')
df_obitos = fill_df('Óbitos')

display(df_casos.tail())
display(df_obitos.tail())

# Remover quando atualizar dias 5 e 6
# verificar se é necessário manter o to_numeric
# df_casos = df_casos.iloc[:-4]
# df_obitos = df_obitos.iloc[:-4]
df_casos_diff = df_casos.copy()
df_obitos_diff = df_obitos.copy()

for estado in df_casos.columns[:-1]:

  df_casos_diff[estado] = df_casos[estado].diff()
  df_obitos_diff[estado] = df_obitos[estado].diff()

bcr_html = bcr.bar_chart_race(df_casos, 
                              figsize=(5, 4),
                              title='COVID-19 Brasil: Casos por 100 mil habitantes')
with open('bcr-casos-por-estado.html', 'w') as fp:
  fp.write(bcr_html.data)

Unnamed: 0_level_0,Acre (AC),Alagoas (AL),Amapá (AP),Amazonas (AM),Bahia (BA),Ceará (CE),Distrito Federal(DF),Espírito Santo (ES),Goiás (GO),Maranhão (MA),Mato Grosso (MT),Mato Grosso do Sul (MS),Minas Gerais (MG),Paraná (PR),Paraíba (PB),Pará (PA),Pernambuco (PE),Piauí (PI),Rio Grande do Norte (RN),Rio Grande do Sul (RS),Rio de Janeiro (RJ),Rondônia (RO),Roraima (RR),Santa Catarina (SC),Sergipe (SE),São Paulo (SP),Tocantins (TO)
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
2020-07-12,2049.105114,1364.539434,4176.298269,2156.066082,699.02842,1546.816691,2751.439689,1616.447876,555.633911,1444.343891,881.234491,503.70229,363.445137,383.907942,1541.176471,1557.022541,781.095064,1031.70579,1156.38017,347.564216,787.873633,1528.187536,4472.40691,639.675933,1695.09009,844.679837,1010.821643
2020-07-13,2057.964741,1393.618302,4201.331558,2178.9365,706.483807,1551.577519,2812.607004,1644.247104,562.563238,1457.159539,893.052109,513.778626,368.097748,393.33935,1549.391481,1566.868962,785.740461,1044.100156,1167.4978,351.248893,802.211422,1546.598056,4553.302639,651.330459,1721.666667,850.606267,1022.51169
2020-07-14,2085.682717,1414.960867,4245.672437,2210.660816,727.224058,1576.806514,2865.914397,1678.584299,579.978537,1481.053861,908.157568,531.832061,376.823191,409.413357,1583.72211,1592.395343,793.015736,1069.045383,1183.367556,363.091231,806.938032,1573.927959,4621.923145,684.554779,1764.279279,877.854223,1050.300601
2020-07-15,2110.109973,1440.818784,4315.312916,2244.295302,746.814276,1597.285989,2933.035019,1707.902188,596.27472,1495.679463,940.415633,558.435115,392.956397,425.306859,1621.1714,1620.435967,807.932744,1109.389671,1192.549135,374.127547,816.8226,1596.16924,4765.402386,713.18567,1808.063063,892.770209,1070.875084
2020-07-16,2134.53723,1467.007827,4394.673768,2272.199277,769.153999,1628.406649,3020.272374,1753.359073,616.296183,1519.865713,964.98139,585.114504,404.743651,445.893502,1658.798174,1647.745851,820.122871,1143.72457,1202.053388,401.629761,817.575942,1638.307604,4909.485326,740.017839,1857.027027,912.915531,1113.694055


Estado,Acre (AC),Alagoas (AL),Amapá (AP),Amazonas (AM),Bahia (BA),Ceará (CE),Distrito Federal(DF),Espírito Santo (ES),Goiás (GO),Maranhão (MA),Mato Grosso (MT),Mato Grosso do Sul (MS),Minas Gerais (MG),Pará (PA),Paraíba (PB),Paraná (PR),Pernambuco (PE),Piauí (PI),Rio de Janeiro (RJ),Rio Grande do Norte (RN),Rio Grande do Sul (RS),Rondônia (RO),Roraima (RR),Santa Catarina (SC),São Paulo (SP),Sergipe (SE),Tocantins (TO)
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
2020-07-12,53.917157,38.561108,63.115846,78.446051,16.411104,77.677259,35.097276,51.737452,13.046144,35.950956,33.405707,6.145038,7.551509,65.556106,32.555781,9.431408,60.303945,30.046948,69.349939,40.891757,8.520815,35.906232,79.889563,7.388137,40.526794,44.324324,17.034068
2020-07-13,54.423422,39.042745,63.648469,78.678369,16.754792,78.875947,36.18677,52.509653,13.490725,36.50562,34.274194,6.374046,7.73838,65.865742,33.01217,9.82852,60.918301,31.048513,69.708384,41.185098,8.813109,36.992567,80.090796,7.685447,40.660763,45.585586,17.301269
2020-07-14,55.182818,39.554485,64.314248,79.091378,17.078652,78.898564,37.354086,53.590734,13.950636,37.016494,35.887097,6.755725,8.088165,66.101065,34.026369,10.34296,61.597327,31.893584,70.619684,42.505133,9.38884,37.907376,81.096962,7.93816,41.607629,46.531532,17.835671
2020-07-15,56.448479,40.066225,64.980027,79.504388,17.435558,80.074635,38.949416,53.976834,14.701824,37.541965,37.127792,6.984733,8.394825,66.695566,35.065923,10.830325,62.211684,32.644757,71.427704,43.385157,9.751993,38.193253,83.51176,8.458451,42.325159,47.477477,18.102872
2020-07-16,56.575045,40.577965,65.645806,79.891585,17.799075,80.730521,40.350195,54.980695,15.606316,38.067435,38.306452,7.290076,8.787734,67.475848,35.953347,11.245487,62.901487,33.333333,71.986634,44.030507,10.106289,38.707833,85.524092,8.740895,43.228883,48.243243,18.570474


In [18]:
from shutil import rmtree
rmtree('COVID-19-master/')

# Mundo

In [19]:
print("Starting world data")
import requests, zipfile, io

csse_path = "https://github.com/CSSEGISandData/COVID-19/archive/master.zip"

r = requests.get(csse_path, stream=True)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall()

Starting world data


In [20]:
root_path = 'COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/'

recent = sorted(os.listdir(root_path))[-2]
path = os.path.join(root_path, recent)
df = pd.read_csv(path)
# print(df.shape)
# display(df)
name = [c for c in df.columns if "Country" in c]
countries = df[name[0]].to_numpy()
countries = np.unique(countries) 
# print(np.unique(countries))

In [21]:
dates = []
for k, csv in enumerate(sorted(os.listdir(root_path))):
  if not csv[-3:] == 'csv': continue
  dates.append(  '/'.join(csv.split('-')[::-1][1:] ) )


In [22]:
ts = {'Deaths': {}, 'Confirmed': {}}
for country in np.unique(countries):
  ts['Deaths'][country] = []
  ts['Confirmed'][country] = []

for k, csv in enumerate(sorted(os.listdir(root_path))):
  print('\r{0}/{1}'.format(k, len(os.listdir(root_path))), end='', flush=True )
  if not csv[-3:] == 'csv': continue

  path = os.path.join(root_path, csv)
  df = pd.read_csv(path)

  for country in np.unique(countries):
    try:
      name = [c for c in df.columns if "Country" in c]
      ts['Deaths'][country].append(df[df[name[0]].str.contains(country)].sum()['Deaths'])
      ts['Confirmed'][country].append(df[df[name[0]].str.contains(country)].sum()['Confirmed'])
    except:
      ts['Deaths'][country].append(0)
      ts['Confirmed'][country].append(0)

1/179


This pattern has match groups. To actually get the groups, use str.extract.



178/179

In [23]:
df_deaths = pd.DataFrame.from_dict(ts['Deaths'])
df_deaths = df_deaths.sort_values(df_deaths.shape[0]-1, axis=1, ascending=False)

df_confirmed = pd.DataFrame.from_dict(ts['Confirmed'])
df_confirmed = df_confirmed.sort_values(df_confirmed.shape[0]-1, axis=1, ascending=False)

In [24]:
def plot_world(df, column, title, filename):
  df = df.loc[:, df.columns[:10]]
  df['Date'] = dates
  df = df.loc[30:]
  df = df.melt('Date', var_name='Country', value_name=column)

  fig = df.plot(x='Date', y=column, color='Country',)
  for data in fig.data:
    color = data.line.color
    data.update(mode='markers+lines', marker=dict(size=4, color='white'), line=dict(width=5), hoverlabel=dict(bgcolor=color))
  
  fig.update_layout(title=title, title_x=0.5, autosize=True,# width=950, height=550,
                    )

  fig.update_layout(
    updatemenus=[
        dict( buttons=list([
            dict(label="Linear",  
                method="relayout", 
                args=[{"yaxis.type": "linear"}]),
            dict(label="Log", 
                method="relayout", 
                args=[{"yaxis.type": "log"}]),
        ]),
        x=0.13,
        y=0.98
        )
        
    ])

  fig.write_html(os.path.join(save_path, filename+'.html'))
  fig.show()

plot_world(df_deaths, 'Deaths', 'Óbitos acumulados nos 10 países mais afetados', 'obitos-mundo')
plot_world(df_confirmed, 'Confirmed', 'Casos acumulados nos 10 países mais afetados', 'casos-mundo')

In [25]:
def plot_mundo_pordia(df, column, title, filename):
  countries_confirmed = df.columns[:10] #['Italy','Germany', 'United Kingdom','India','US', 'Brazil']
  df_ = pd.DataFrame(columns=countries_confirmed)

  for k, country in enumerate(countries_confirmed):
    df_[country] = df[country].diff()
    df_[country] = df_[country].rolling(8).mean()

  df_['Data'] = dates
  df_ = df_.iloc[30:]
  # if 'Spain' in df.columns: df_.drop('Spain', axis=1, inplace=True)
  # if 'France' in df.columns: df_.drop('France', axis=1, inplace=True)
  
  dfm = df_.melt('Data', var_name='Country', value_name=column)
  
  fig = dfm.plot(x='Data', y=column, color='Country',)

  for data in fig.data:
    color = data.line.color
    data.update(mode='markers+lines', marker=dict(size=3, color='white'), line=dict(width=5), hoverlabel=dict(bgcolor=color))
  
  fig.update_layout(title=title, title_x=0.5, autosize=True)#width=950, height=550,)

  fig.write_html(os.path.join(save_path, filename+'.html'))
  fig.show()


plot_mundo_pordia(df_confirmed, 'Casos por dia', 'Casos por dia nos países mais afetados', 'casos-mundo-por-dia')
plot_mundo_pordia(df_deaths, 'Óbitos por dia', 'Óbitos por dia nos países mais afetados','obitos-mundo-por-dia')

In [26]:
rmtree("COVID-19-master")


In [27]:
!zip includes.zip *.html

  adding: bcr-casos-por-estado.html (deflated 24%)
  adding: casos.html (deflated 70%)
  adding: casos-mundo.html (deflated 70%)
  adding: casos-mundo-por-dia.html (deflated 70%)
  adding: casos-por-dia.html (deflated 70%)
  adding: casos-por-estado.html (deflated 70%)
  adding: obitos.html (deflated 70%)
  adding: obitos-mundo.html (deflated 70%)
  adding: obitos-mundo-por-dia.html (deflated 70%)
  adding: obitos-por-dia.html (deflated 70%)
  adding: obitos-por-estado.html (deflated 70%)
  adding: resumo-por-estado.html (deflated 71%)
