In [1]:
import pandas as pd
import os

In [2]:
os.chdir(r'C:\Users\user\Documents\GitHub\COVID-19\python\scripts')
df_file = r'C:\Users\user\Documents\GitHub\COVID-19\consolidated_data\country_report.json'

config = pd.read_csv('config.csv',index_col='var').fillna('-')
file_dir = config.loc['flourish_data_dir'].path

data = pd.read_json(df_file)

# DATA FOR FLOURISH @ https://app.flourish.studio/@psycho.presley

In [3]:
def flourish_racing_bars(df,parameters,initial_date,file_dir,file_name='racing_bars'):
    '''
    With this function it is possible to generate the dataset as used
    by Florish Studio @ https://app.flourish.studio/@psycho.presley

    Parameters
    ----------
    df: obj, DataFrame
        pandas DataFrame with the data to be used. The DataFrame must
        have been generated by the world_data_formatter function 
        presented in the pycovidfunc.py module
    
    parameters: str, array-like
        list with the columns of df to be used. Each column will
        generate one separate and independent file to be used in
        Flourish studio
    
    initial_date: str
        string of the date in the YYYY-MM-DD format to be the first
        date to be considered in the final file
    
    file_dir: str
        string of the root dir where the flourish data must be saved

    file_name: str
        the name of the *.csv file to be created
    '''
    from pandas import DataFrame, concat
    from os import path

    print('--------------------------')
    print('Creating files for the flourish racing bars chart')
    try:
        countries = df['Country/Region'].unique().tolist()
        
        # The entry 'French Guiana' is resulting in errors in Fourish studio
        # so it will be removed:
        countries.remove('French Guiana')
        
        for item in parameters:
            print('creating the {} cases file'.format(item))
            columns = ['Country/Region','Date',item]
            flourish = DataFrame()

            for country in countries:
                df_aux = df[columns].loc[df['Country/Region'] == country]
                df_aux = df_aux.pivot(index='Country/Region',columns='Date', values=item)

                flourish = concat([flourish,df_aux]).interpolate(method='linear',limit=3)
                flourish.fillna(method='bfill',inplace=True)

            file = path.join(file_dir,file_name + '_' + item + '.csv')
            flourish.loc[:,initial_date:].to_csv(file)
        print('Files created succesfully!')
    except:
        print('Process aborted! No files for flourish studio were created.')
    finally:
        print('End execution of the flourish racing bars chart function.')
        print('--------------------------')                        

In [4]:
def flourish_parliament_map(df,seats,region_mapping_dict,file_dir,places=1000,file_name='parliament_map'):
    '''
    With this function it is possible to generate the dataset as used
    by the parliament map viz in Florish Studio
    @ https://app.flourish.studio/@psycho.presley

    Parameters
    ----------
    df: obj, DataFrame
        pandas DataFrame with the data to be used. The DataFrame must
        have been generated by the world_data_formatter function 
        presented in the pycovidfunc.py module
    
    seats: str, array-like
        list with the columns of df to be used as seats. Each column
        represents one seat tab in the Flourish studio parliament chart
    
    region_mapping_chart: dict
        dictionary with the countries as keys and their region as values
        for region mapping

    file_dir: str
        string of the root dir where the flourish data must be saved
    
    places: int
        desired number of places in the parliament chart

    file_name: str
        the name of the *.csv file to be created
    '''
    from os import path

    print('--------------------------')
    print('Creating files for the flourish studio parliament map')
    try:
        columns = ['Country/Region']
        columns.extend(seats)
        df_aux = df[columns].loc[df['Date'] == max(df['Date'])]

        for item in seats:
            df_aux[item] = df_aux[item].apply(lambda x:places*x/df_aux[item].sum())
        
        # Saving the first file for the countries parliament chart:
        df_aux.to_csv(path.join(file_dir,file_name + '_country.csv'),index=False)
        
        # Now ready to create the regions parliament chart
        # mapping the country -> region:
        df_aux['Country/Region'] = df_aux['Country/Region'].transform(lambda x: region_mapping_dict[x] 
                                                                      if x in region_mapping_dict.keys()
                                                                      else x)
        
        df_aux = df_aux.groupby('Country/Region').sum().reset_index()
        df_aux.to_csv(path.join(file_dir,file_name + '_region.csv'),index=False)

        print('Files created succesfully!')
    except:
        print('Process aborted! No files for flourish studio were created.')
    finally:
        print('End execution of the flourish parliament map function.')
        print('--------------------------')                

In [5]:
def flourish_hierarchy_chart(df,cases,region_mapping_dict,file_dir,file_name='hierarchy_chart'):
    '''
    With this function it is possible to generate the dataset as used
    by the parliament map viz in Florish Studio
    @ https://app.flourish.studio/@psycho.presley

    Parameters
    ----------
    df: obj, DataFrame
        pandas DataFrame with the data to be used. The DataFrame must
        have been generated by the world_data_formatter function 
        presented in the pycovidfunc.py module
    
    cases: str, array-like
        list with the columns of df to be used as seats. Each column
        represents one seat tab in the Flourish studio parliament chart
    
    region_mapping_chart: dict
        dictionary with the countries as keys and their region as values
        for region mapping

    file_dir: str
        string of the root dir where the flourish data must be saved
    
    file_name: str
        the name of the *.csv file to be created
    '''
    from os import path

    print('--------------------------')
    print('Creating files for the flourish studio hierarchy chart')
    try:
        columns = ['Country/Region']
        columns.extend(cases)
        df_aux = df[columns].loc[df['Date'] == max(df['Date'])]

        # mapping the country -> region:
        df_aux['Group'] = df_aux['Country/Region'].transform(lambda x: region_mapping_dict[x] 
                                                                      if x in region_mapping_dict.keys()
                                                                      else x)

        # Saving the first file for the countries parliament chart:
        df_aux.to_csv(path.join(file_dir,file_name + '.csv'),index=False)
        print('Files created succesfully!')
    except:
        print('Process aborted! No files for flourish studio were created.')
    finally:
        print('End execution of the flourish hierarchy chart function.')
        print('--------------------------')        

In [6]:
def flourish_point_map(df,parameters,lat,long,file_dir,file_name='point_map'):
    '''
    With this function it is possible to generate the dataset as used
    by the parliament map viz in Florish Studio
    @ https://app.flourish.studio/@psycho.presley

    Parameters
    ----------
    df: obj, DataFrame
        pandas DataFrame with the data to be used. The DataFrame must
        have been generated by the world_data_formatter function 
        presented in the pycovidfunc.py module
    
    parameters: str, array-like
        list with the columns of df to be used as map variables. Each
        column represents one seat tab in the Flourish studio
        parliament chart
    
    lat: dict
        dictionary with the countries as keys and their latitude
        coordinate as values for mapping
    
    long: dict
        dictionary with the countries as keys and their longitude
        coordinate as values for mapping
    
    file_dir: str
        string of the root dir where the flourish data must be saved
    
    file_name: str
        the name of the *.csv file to be created
    '''
    from os import path
    
    print('--------------------------')
    print('Creating files for the flourish studio point map')
    try:
        df_aux=df[['Country/Region','Date']]

        for item in parameters:
            df_aux = pd.concat([df_aux,
                                df.groupby('Country/Region')[item].diff().fillna(value=0)],
                                axis=1).sort_values(by='Date')

        # mapping the country -> Lat/Long:
        df_aux['Latitude'] = df_aux['Country/Region'].transform(lambda x: lat[x] 
                                                                      if x in lat.keys() 
                                                                      else 0)
        df_aux['Longitude'] = df_aux['Country/Region'].transform(lambda x: long[x] 
                                                                      if x in long.keys() 
                                                                      else 0)

        df_aux.to_csv(path.join(file_dir,file_name + '.csv'),index=False)
        print('Files created succesfully!')
    except:
        print('Process aborted! No files were created.')
    finally:
        print('End execution of the flourish point map function.')
        print('--------------------------')

In [3]:
df = data.copy()

In [4]:
# region mapping dictionary:
region_mapping_dict = pd.read_csv('region_mapping.csv',header=None,index_col=0).to_dict()[1]

In [8]:
# 1 - Racing bars chart:
initial_date = '2020-03-06'
parameters = ['Active','Confirmed','Deaths','Recovered']

flourish_racing_bars(df,parameters,initial_date,file_dir)

# ===============
# 2 - Parliament map:

seats = ['Confirmed','Active','Recovered','Deaths']
flourish_parliament_map(df,seats,region_mapping_dict,file_dir)

# ===============
# 3 - Point map:

lat = pd.read_csv('coordinates.csv',header=None,index_col=0).to_dict()[1]
long = pd.read_csv('coordinates.csv',header=None,index_col=0).to_dict()[2]
parameters = ['Confirmed','Active','Recovered','Deaths']

flourish_point_map(df,parameters,lat,long,file_dir)

# ===============
# 4 - Hierarchy chart:

cases = ['Confirmed','Active','Recovered','Deaths']
flourish_hierarchy_chart(df,cases,region_mapping_dict,file_dir)

--------------------------
Creating files for the flourish racing bars chart
creating the Active cases file
creating the Confirmed cases file
creating the Deaths cases file
creating the Recovered cases file
Files created succesfully!
End execution of the flourish racing bars chart function.
--------------------------
--------------------------
Creating files for the flourish studio parliament map
Files created succesfully!
End execution of the flourish parliament map function.
--------------------------
--------------------------
Creating files for the flourish studio point map
Files created succesfully!
End execution of the flourish point map function.
--------------------------
--------------------------
Creating files for the flourish studio hierarchy chart
Files created succesfully!
End execution of the flourish hierarchy chart function.
--------------------------


In [10]:
# region mapping dictionary:
region_mapping_dict = pd.read_csv('region_mapping.csv',header=None,index_col=0).to_dict()[1]


# 5 - Scatter plot:
cases = ['Confirmed','Active','Recovered','Deaths']

file_dir = os.getcwd()
flourish_scatter_with_lines(df,cases,region_mapping_dict,file_dir,file_name='scatter_plot')

--------------------------
Creating files for the flourish studio scatter plot


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Files created succesfully!
End execution of the flourish scatter plot function.
--------------------------


In [46]:
from quantiphy import Quantity as qty
from math import floor

columns = ['Country/Region']
cases = ['Confirmed','Active','Recovered','Deaths']

columns.extend(cases)
df_aux = df[columns].loc[df['Date'] == max(df['Date'])]

df_aux['Group'] = df_aux['Country/Region'].transform(lambda x: region_mapping_dict[x] 
                                                     if x in region_mapping_dict.keys() 
                                                     else x)

df_aux = df_aux.groupby('Group').sum()
df_aux.drop('Other',inplace=True)

df_logo = pd.read_csv('region_logo.csv', index_col='Group')
df_aux = df_aux.join(df_logo, on='Group')

for item in cases:
    df_aux[item] = df_aux[item].transform(lambda x:qty(x).render(prec=2))
    
df_aux.to_csv(os.path.join(os.getcwd(),'card_plot.csv'))

In [None]:
df_aux.transform()