In [121]:
import pandas as pd
from app import indicator_data, indicator_properties, data

In [73]:
def get_missing_values_stat(data, indicator_properties, max_year=2019, min_year=2005):
    data = data[(data.Year >= min_year) & (data.Year <= max_year)]
    data = pd.merge(data, indicator_properties, on='Indicator').astype({'Year': int})
    
    
    

    
    total_points = (indicator_properties.groupby('Category').Indicator.count() * (max_year - min_year + 1))
    
    df =  data.groupby(['ISO', 'Category']).apply(lambda x: x['Imputed'].sum()).divide(total_points) * 100
    
    ISOs = data.ISO.unique()
    Categorys = indicator_properties.Category.unique()
    full_index = pd.MultiIndex.from_product([ISOs, Categorys],
                               names=['ISO', 'Category'])
    
    return (100 - df.reindex(full_index, fill_value=100)).round(1).to_frame(name='Data availability (%)')


In [74]:
missing_data = get_missing_values_stat(indicator_data, indicator_properties)#.reset_index().set_index(['ISO', 'Category'])

In [130]:
missing_data.reset_index().merge(data[['Variable_name', 'Variable']].drop_duplicates(), left_on='Category', right_on='Variable')

Unnamed: 0,ISO,Category,Data availability (%),Variable_name,Variable
0,ALB,GE,100.0,Greenhouse gas emissions reductions,GE
1,AND,GE,100.0,Greenhouse gas emissions reductions,GE
2,ARE,GE,100.0,Greenhouse gas emissions reductions,GE
3,ARM,GE,100.0,Greenhouse gas emissions reductions,GE
4,ASM,GE,0.0,Greenhouse gas emissions reductions,GE
...,...,...,...,...,...
3883,JEY,GJ,0.0,Green employment,GJ
3884,NFK,GJ,0.0,Green employment,GJ
3885,PCN,GJ,0.0,Green employment,GJ
3886,TWN,GJ,0.0,Green employment,GJ


Unnamed: 0,Variable_name,Variable
0,,AB1
1,,AB2
2,,AB3
3,,BE1
4,,BE2
...,...,...
337117,Social protection,SP
337118,Social inclusion,SI
337119,Natural capital protection,NCP
337120,Efficient and sustainable resource use,ESRU


In [117]:
import plotly.express as px


def missing_bar_plot(ISO):
    cats = ['EE', 'EW', 'SL', 'ME',
            'EQ', 'GE', 'BE', 'CV',
            'AB', 'GB', 'SE', 'SP',
            'GV', 'GT', 'GJ', 'GN']
    

    plot_df = missing_data.loc[ISO].reset_index().merge(indicator_properties[['Category', "Dimension"]].drop_duplicates(), on='Category')#.T[cats].T.reset_index()
    plot_df_bis = plot_df.copy().assign(Dimension='Missing')
    plot_df_bis['Data availability (%)'] = (100 - plot_df_bis['Data availability (%)']).round(2)

    plot_df = pd.concat([plot_df, plot_df_bis])
    fig = px.bar(plot_df,
             y='Category',
             x='Data availability (%)',
             color='Dimension',
             barmode='stack',
             orientation='h',
            text='Data availability (%)',
            hover_data={'Dimension': False, 'Data availability (%)': False, 'Category': False},
            color_discrete_map={
                           "Social Inclusion": "#d9b5c9",
                           "Natural Capital Protection": "#f7be49",
                           "Efficient and Sustainable Resource Use": "#8fd1e7",
                           "Green Economic Opportunities": "#9dcc93",
                           "Missing": '#D3D3D3'
                       },
                ).update_layout(height=1000,
                                plot_bgcolor='rgba(0, 0, 0, 0)',
                                legend=dict(
        yanchor="top",
        y=-0.05,
        xanchor="center",
        x=0,
        title=''
        
    ),
    ).update_traces(texttemplate='%{text:.2s}%', textposition='inside', textfont=dict(
        family="sans serif",
        size=18,
        color="white"
    )).update_yaxes(title="").update_xaxes(showticklabels=False)

    return fig


In [118]:
missing_bar_plot('FRA')