In [1]:
import pandas as pd
import os

# DATA FOR FLOURISH @ https://app.flourish.studio/@psycho.presley

In [2]:
df_file = r'C:\Users\user\Documents\GitHub\COVID-19\consolidated_data\country_report.json'

In [10]:
df = pd.read_json(df_file)

In [31]:
os.chdir(r'C:\Users\user\Documents\GitHub\COVID-19\python\scripts')

In [36]:
def generate_flourish(df,parameters,initial_date,file_dir):
    '''
    With this function it is possible to generate the dataset as used
    by Florish Studio @ https://app.flourish.studio/@psycho.presley

    Parameters
    ----------
    df: obj, DataFrame
        pandas DataFrame with the data to be used. The DataFrame must
        have been generated by the world_data_formatter function 
        presented in the pycovidfunc.py module
    
    parameters: str, array-like
        list with the columns of df to be used. Each column will
        generate one separate and independent file to be used in
        Flourish studio
    
    initial_date: str
        string of the date in the YYYY-MM-DD format to be the first
        date to be considered in the final file.
    '''
    from pandas import DataFrame, concat
    from os import path

    try:
        countries = df['Country/Region'].unique().tolist()
        
        # The entry 'French Guiana' is resulting in errors in Fourish studio
        # so it will be removed:
        countries.remove('French Guiana')
        
        for item in parameters:
            print('creating the {} cases file'.format(item))
            columns = ['Country/Region','Date',item]
            flourish = DataFrame()

            for country in countries:
                df_aux = df[columns].loc[df['Country/Region'] == country]
                df_aux = df_aux.pivot(index='Country/Region',columns='Date', values=item)

                flourish = concat([flourish,df_aux]).interpolate(method='linear',limit=3)
                flourish.fillna(method='bfill',inplace=True)

            file = path.join(file_dir,item + '.csv')
            flourish.loc[:,initial_date:].to_csv(file)
    except:
        print('Process aborted! No files for flourish studio were created.')
    finally:
        print('Flourish studio files generated succesfully!')            

In [34]:
config = pd.read_csv('config.csv',index_col='var').fillna('-')
file_dir = config.loc['flourish_data_dir'].path

initial_date = '2020-03-06'
parameters = ['Active','Confirmed','Deaths','Recovered']
generate_flourish(df,parameters,initial_date,file_dir)

creating the Active cases file
creating the Confirmed cases file
creating the Deaths cases file
creating the Recovered cases file
Flourish studio files generated succesfully!
