## Imports em geral

#### Precisa instalar a investpy: pip install investpy

In [1]:
import pandas as pd
import investpy as ivp

## Gerando série histórica dos principais índices por país

In [12]:
countries = ['brazil', 'united states', 'united kingdom']
country_index = {
    'brazil':['bovespa'],
    'united states':['nasdaq', 's&p 500'],
    'united kingdom':['ftse 100', 'MSCI United Kingdom']
}

# idx = ivp.indices.get_indices(country='united kingdom')
# idx.loc[idx['class'] == 'major_indices'] # para checar nomes de index novos, caso queira adicionar países

def generate_idxs(countries = countries, country_index = country_index):
    idxs = pd.DataFrame()
    for country in countries:
        indexes = country_index[country]
        for index in indexes:
            index_data = ivp.indices.get_index_historical_data(index, country, '01/01/2020', '31/03/2020')
            index_data['Index'] = index
            index_data['Country'] = country
            index_data = index_data.reset_index()[['Date', 'Index', 'Country', 'Close', 'Currency']]
            idxs = idxs.append(index_data, ignore_index = True)
    idxs.to_csv('./data/processed/idxs.csv', index = False)
    return idxs

## Gerando série histórica de dados do COVID-19 por país

Cria um df com as seguintes métricas nas dimensões dia e país:
 - Número de mortos e o delta diário percentual e bruto
 - Número de casos confirmados e o delta diário percentual e bruto
 - Número de recuperados e o delta diário percentual e bruto

In [3]:
def generate_covid_abroad():
    covid_abroad = pd.DataFrame()
    
    recovered = pd.read_csv('./data/external/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
    deaths = pd.read_csv('./data/external/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
    confirmed = pd.read_csv('./data/external/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
    
    for country in ['US', 'United Kingdom']:
        recovered_country = recovered.loc[recovered['Country/Region'] == country].drop(columns = ['Province/State', 'Country/Region', 'Lat', 'Long']).T.sum(axis = 1).to_frame(name = f'recovered {country}')
        deaths_country = deaths.loc[deaths['Country/Region'] == country].drop(columns = ['Province/State', 'Country/Region', 'Lat', 'Long']).T.sum(axis = 1).to_frame(name = f'deaths {country}')
        confirmed_country = confirmed.loc[confirmed['Country/Region'] == country].drop(columns = ['Province/State', 'Country/Region', 'Lat', 'Long']).T.sum(axis = 1).to_frame(name = f'confirmed {country}')

        recovered_country[f'recovered {country}'] = recovered_country[f'recovered {country}'].diff()
        deaths_country[f'deaths {country}'] = deaths_country[f'deaths {country}'].diff()
        confirmed_country[f'confirmed {country}'] = confirmed_country[f'confirmed {country}'].diff()
        
        recovered_country[f'diff_pct recovered {country}'] = recovered_country[f'recovered {country}'].pct_change()
        deaths_country[f'diff_pct deaths {country}'] = deaths_country[f'deaths {country}'].pct_change()
        confirmed_country[f'diff_pct confirmed {country}'] = confirmed_country[f'confirmed {country}'].pct_change()

        covid_country = recovered_country.merge(deaths_country, left_index=True, right_index = True, how = 'outer').merge(confirmed_country, left_index=True, right_index = True, how = 'outer')
        covid_abroad = covid_abroad.merge(covid_country, left_index = True, right_index = True, how = 'outer')
    
    covid_abroad.index = pd.to_datetime(covid_abroad.index)
    covid_abroad.to_csv('./data/processed/covid_abroad.csv', index = True)
    return covid_abroad

In [4]:
def generate_covid_brazil():
    from_ministerio = pd.read_csv('./data/external/brasil_ministerio_saude.csv', sep = ';')
    from_ministerio = from_ministerio.drop(columns = ['regiao', 'estado', 'casosAcumulados', 'obitosAcumulados']).groupby(['data']).sum()
    from_ministerio.index = pd.to_datetime(from_ministerio.index, format = '%d/%m/%Y')
    from_ministerio = from_ministerio.rename(columns = {
        'casosNovos':'confirmed brazil',
        'obitosNovos':'deaths brazil'
    }).sort_index()

    recovered = pd.read_csv('./data/external/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
    recovered_brazil = recovered.loc[recovered['Country/Region'] == 'Brazil'].drop(columns = ['Province/State', 'Country/Region', 'Lat', 'Long']).T.sum(axis = 1).to_frame(name = 'recovered brazil')
    recovered_brazil['recovered brazil'] = recovered_brazil['recovered brazil'].diff()
    recovered_brazil.index = pd.to_datetime(recovered_brazil.index, infer_datetime_format=True)

    covid_brazil = from_ministerio.merge(recovered_brazil, left_index = True, right_index = True, how='left')
    
    covid_brazil.to_csv('./data/processed/covid_brazil.csv', index = True)
    
    return covid_brazil

In [14]:
idxs = generate_idxs().head()
covid_abroad = generate_covid_abroad()
covid_brazil = generate_covid_brazil()

In [15]:
idxs.head()

Unnamed: 0,Date,Index,Country,Close,Currency
0,2020-01-01,bovespa,brazil,118573.1,BRL
1,2020-01-02,bovespa,brazil,117706.66,BRL
2,2020-01-05,bovespa,brazil,116877.92,BRL
3,2020-01-06,bovespa,brazil,116661.94,BRL
4,2020-01-07,bovespa,brazil,116247.03,BRL


In [11]:
covid_abroad.tail()

Unnamed: 0,recovered US,diff_pct recovered US,deaths US,diff_pct deaths US,confirmed US,diff_pct confirmed US,recovered United Kingdom,diff_pct recovered United Kingdom,deaths United Kingdom,diff_pct deaths United Kingdom,confirmed United Kingdom,diff_pct confirmed United Kingdom
2020-03-29,1593.0,6.847291,441.0,-0.008989,19408.0,-0.020836,0.0,,210.0,-0.192308,2468.0,-0.038566
2020-03-30,2979.0,0.870056,511.0,0.15873,20921.0,0.077958,20.0,inf,180.0,-0.142857,2673.0,0.083063
2020-03-31,1380.0,-0.536757,895.0,0.751468,26365.0,0.260217,8.0,-0.6,382.0,1.122222,3028.0,0.13281
2020-04-01,1450.0,0.050725,884.0,-0.012291,25200.0,-0.044187,0.0,-1.0,564.0,0.47644,4384.0,0.44782
2020-04-02,527.0,-0.636552,1169.0,0.322398,30081.0,0.19369,13.0,inf,569.0,0.008865,4308.0,-0.017336


In [10]:
covid_brazil

Unnamed: 0_level_0,confirmed brazil,deaths brazil,recovered brazil
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-30,0,0,0.0
2020-01-31,0,0,0.0
2020-02-01,0,0,0.0
2020-02-02,0,0,0.0
2020-02-03,0,0,0.0
...,...,...,...
2020-03-29,353,22,0.0
2020-03-30,323,23,114.0
2020-03-31,1138,42,7.0
2020-04-01,1117,40,0.0
