### History of the COVID-19 pandemic in Pernambuco, Brazil

This notebook exists to show the process of acquiring the number of cases and deaths in the pandemic from [Pernambuco's website](https://www.pecontracoronavirus.pe.gov.br/) and then creating quickstatements to provide said data in Wikidata.

In [1]:
import pandas as pd
from datetime import date,datetime, timedelta
import numpy as np

In [2]:
pe_bruto = pd.read_csv("./COVID-19 no Mundo, no Brasil e em Pernambuco.csv")
print(pe_bruto.shape)
pe_bruto.head()

(816, 15)


Unnamed: 0,X,id,dt_notificacao,Idade,Sexo,mun_notificacao,dt_primeiros_sintomas,sintomas,comorbidades,hospitalizacao,classe,Resultado,evolucao,dt_obito,cd_municipio
0,5,512,20/03/2020,65,MASCULINO,RECIFE,09/03/2020,FEBRE - TOSSE - DISPNEIA - DESCONFORTO RESPIRA...,,SIM,CONFIRMADO,SARS-COV-2,ÓBITO,03/04/2020,261160
1,10,493,19/03/2020,38,FEMININO,RECIFE,,,,,CONFIRMADO,SARS-COV-2,ISOLAMENTO DOMICILIAR,,261160
2,14,1386,26/03/2020,69,FEMININO,RECIFE,19/03/2020,"FEBRE, TOSSE",,SIM,CONFIRMADO,NEGATIVO INFLUENZA/SARS-COV-2,INTERNADO UTI,,261160
3,51,79,13/03/2020,57,MASCULINO,BELO JARDIM,02/03/2020,FEBRE - DIFICULDADE DE RESPIRAR - CEFALEIA - O...,"DOENÇA CARDIOVASCULAR, INCLUINDO HIPERTENSÃO",SIM,CONFIRMADO,SARS-COV-2,ISOLAMENTO DOMICILIAR,,260170
4,62,81,13/03/2020,39,FEMININO,RECIFE,11/03/2020,TOSSE - CORIZA,SEM COMORBIDADES,NÃO,CONFIRMADO,SARS-COV-2,RECUPERADO,,261160


In [3]:
pe_bruto = pe_bruto.query("classe == 'CONFIRMADO'")
print(pe_bruto.shape)
pe_bruto.head()

(816, 15)


Unnamed: 0,X,id,dt_notificacao,Idade,Sexo,mun_notificacao,dt_primeiros_sintomas,sintomas,comorbidades,hospitalizacao,classe,Resultado,evolucao,dt_obito,cd_municipio
0,5,512,20/03/2020,65,MASCULINO,RECIFE,09/03/2020,FEBRE - TOSSE - DISPNEIA - DESCONFORTO RESPIRA...,,SIM,CONFIRMADO,SARS-COV-2,ÓBITO,03/04/2020,261160
1,10,493,19/03/2020,38,FEMININO,RECIFE,,,,,CONFIRMADO,SARS-COV-2,ISOLAMENTO DOMICILIAR,,261160
2,14,1386,26/03/2020,69,FEMININO,RECIFE,19/03/2020,"FEBRE, TOSSE",,SIM,CONFIRMADO,NEGATIVO INFLUENZA/SARS-COV-2,INTERNADO UTI,,261160
3,51,79,13/03/2020,57,MASCULINO,BELO JARDIM,02/03/2020,FEBRE - DIFICULDADE DE RESPIRAR - CEFALEIA - O...,"DOENÇA CARDIOVASCULAR, INCLUINDO HIPERTENSÃO",SIM,CONFIRMADO,SARS-COV-2,ISOLAMENTO DOMICILIAR,,260170
4,62,81,13/03/2020,39,FEMININO,RECIFE,11/03/2020,TOSSE - CORIZA,SEM COMORBIDADES,NÃO,CONFIRMADO,SARS-COV-2,RECUPERADO,,261160


In [4]:
pe_info = pe_bruto.copy(deep=True)
pe_info = pe_bruto.loc[:,['dt_notificacao', 'classe', 'evolucao', 'dt_obito']]

pe_conf = pd.DataFrame(pe_info.groupby(['dt_notificacao'])['classe']
                       .count()).reset_index()

pe_obt = pd.DataFrame(pe_info.query("evolucao == 'ÓBITO'")
                      .groupby(['dt_obito'])['evolucao']
                      .count()).reset_index()

pe_obt.columns = ['dt_notificacao', 'obt']

In [5]:
pe_counts = pd.merge(pe_conf,pe_obt,on='dt_notificacao', how='left')
pe_counts.columns = ['date', 'confirmed', 'deaths']
pe_counts['date'] = pd.to_datetime(pe_counts['date'], format='%d/%m/%Y')
pe_counts = pe_counts.sort_values(by='date', axis = 0)
pe_counts

Unnamed: 0,date,confirmed,deaths
4,2020-03-05,2,
7,2020-03-07,1,
12,2020-03-12,7,
13,2020-03-13,8,
14,2020-03-14,1,
15,2020-03-15,6,
16,2020-03-16,9,
17,2020-03-17,5,
18,2020-03-18,3,
19,2020-03-19,2,


In [6]:
pe_counts['ntotal_Conf'] = pe_counts['confirmed'].cumsum()
pe_counts['ntotal_Ob'] = pe_counts['deaths'].cumsum()
pe_counts

Unnamed: 0,date,confirmed,deaths,ntotal_Conf,ntotal_Ob
4,2020-03-05,2,,2,
7,2020-03-07,1,,3,
12,2020-03-12,7,,10,
13,2020-03-13,8,,18,
14,2020-03-14,1,,19,
15,2020-03-15,6,,25,
16,2020-03-16,9,,34,
17,2020-03-17,5,,39,
18,2020-03-18,3,,42,
19,2020-03-19,2,,44,


In [7]:
date_str = []
for dt in pe_counts['date']:
    conv = datetime.date(dt)
    date_str.append(conv.strftime("+%Y-%m-%dT00:00:00Z/11"))
pe_counts['wdt_dates'] = date_str
pe_counts.head()

Unnamed: 0,date,confirmed,deaths,ntotal_Conf,ntotal_Ob,wdt_dates
4,2020-03-05,2,,2,,+2020-03-05T00:00:00Z/11
7,2020-03-07,1,,3,,+2020-03-07T00:00:00Z/11
12,2020-03-12,7,,10,,+2020-03-12T00:00:00Z/11
13,2020-03-13,8,,18,,+2020-03-13T00:00:00Z/11
14,2020-03-14,1,,19,,+2020-03-14T00:00:00Z/11


In [8]:
print("CREATE\n" + 
      'LAST|Len|' + '"' + "COVID-19 pandemic in the state of Pernambuco" + '"\n' +
      'LAST|Den|' + '"'+ "ongoing viral pandemic in Pernambuco, Brazil" + '"\n' +
      'LAST|P31|' + "Q3241045"  + "|P642|"+ "Q84263196" + "|P3005|" + "Q40942" +'\n' +
      'LAST|P361|' + "Q86597695"  + '\n' +
      "LAST|P17|" + "Q155" + '\n' +
      "LAST|P276|" + "Q40942" +'\n' +
      "LAST|P580|" + "+2020-03-05T00:00:00Z/11")
for index, row in pe_counts.iterrows():
        print(
      "LAST|P1603|" + str(int(row['ntotal_Conf'])) + "|P585|" + row['wdt_dates'] + "|S854|" + '"' + "http://dados.seplag.pe.gov.br/apps/corona.html" + '"'
        )
        if not np.isnan(row['ntotal_Ob']):
            print(
      "LAST|P1120|" + str(int(row['ntotal_Ob'])) + "|P585|" + row['wdt_dates'] + "|S854|" + '"' + "http://dados.seplag.pe.gov.br/apps/corona.html" + '"'
        )

CREATE
LAST|Len|"COVID-19 pandemic in the state of Pernambuco"
LAST|Den|"ongoing viral pandemic in Pernambuco, Brazil"
LAST|P31|Q3241045|P642|Q84263196|P3005|Q40942
LAST|P361|Q86597695
LAST|P17|Q155
LAST|P276|Q40942
LAST|P580|+2020-03-05T00:00:00Z/11
LAST|P1603|2|P585|+2020-03-05T00:00:00Z/11|S854|"http://dados.seplag.pe.gov.br/apps/corona.html"
LAST|P1603|3|P585|+2020-03-07T00:00:00Z/11|S854|"http://dados.seplag.pe.gov.br/apps/corona.html"
LAST|P1603|10|P585|+2020-03-12T00:00:00Z/11|S854|"http://dados.seplag.pe.gov.br/apps/corona.html"
LAST|P1603|18|P585|+2020-03-13T00:00:00Z/11|S854|"http://dados.seplag.pe.gov.br/apps/corona.html"
LAST|P1603|19|P585|+2020-03-14T00:00:00Z/11|S854|"http://dados.seplag.pe.gov.br/apps/corona.html"
LAST|P1603|25|P585|+2020-03-15T00:00:00Z/11|S854|"http://dados.seplag.pe.gov.br/apps/corona.html"
LAST|P1603|34|P585|+2020-03-16T00:00:00Z/11|S854|"http://dados.seplag.pe.gov.br/apps/corona.html"
LAST|P1603|39|P585|+2020-03-17T00:00:00Z/11|S854|"http://dados.se