In [113]:
import pickle
from IPython.display import Markdown as md
from time import gmtime, strftime
from platform import platform
import pandas as pd
import plotly.offline as py
import plotly.graph_objs as go

In [114]:
def print_records_summary(log):
    
    s = []
    for k, row in log.loc[['_RECORDS' in x for x in log['key']], :].iterrows():

        s.append(row['key'] + ': ' + row['value'])
    
    return(s)

In [115]:
report = pickle.load( open( "../tmp/report.pickle", "rb" ) )
report_master = pd.DataFrame(report['master']).dropna(subset = ['key'])
master = pd.read_csv('../tmp/master/master.csv', low_memory=False)

# WHO PHSM Cleaning Summary Report

In [116]:
md('<b>Data release:</b> ' + strftime("%Y-%m-%d %H:%M:%S", gmtime()))

<b>Data release:</b> 2020-11-18 20:13:24

### Total records

In [117]:
s = print_records_summary(report_master)
md('<br>'.join(s))

ACAPS_RECORDS: 21967<br>CDC_ITF_RECORDS: 8805<br>JH_HIT_RECORDS: 6795<br>OXCGRT_RECORDS: 2617<br>OxCGRT_RECORDS: 31375<br>not_cleansed_RECORDS: 5285<br>sequenced_RECORDS: 66274

### Countries

In [119]:
m = master.loc[master['processed'] == 'sequenced', ['dataset', 'country_territory_area', 'who_id']]
m = m.groupby(['dataset', 'country_territory_area']).count().reset_index()
m = m.pivot(index='country_territory_area', columns='dataset', values='who_id')
m = m.sort_values(by = 'OXCGRT', ascending = False)

dataset,ACAPS,CDC_ITF,JH_HIT,OxCGRT
country_territory_area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,58.0,33.0,32.0,88.0
Albania,73.0,49.0,,131.0
Algeria,96.0,40.0,16.0,88.0
American Samoa,,,14.0,
Andorra,,14.0,,83.0
...,...,...,...,...
"Venezuela, Bolivarian Republic Of",87.0,25.0,45.0,114.0
Viet Nam,107.0,83.0,,180.0
Yemen,48.0,23.0,,47.0
Zambia,36.0,32.0,10.0,114.0


In [112]:
fig = go.Figure()

for column in m.columns.to_list():
    fig.add_trace(
        go.Bar(
            y = m.index,
            x = m[column],
            name = column,
            orientation='h'
        )
    )
    
fig.update_layout(
    height = 1800,
    barmode='stack',
    title = "Records per country",
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    updatemenus=[
        dict(
            buttons=[dict(label = 'All',
                  method = 'update',
                  args = [{'visible': [True, True, True, True]},
                          {'title': 'All',
                           'showlegend':True}]),
             dict(label = 'ACAPS',
                  method = 'update',
                  args = [{'visible': [True, False, False, False]},
                          {'title': 'OXCGRT',
                          'showlegend':True}]),
            dict(label = 'CDC_ITF',
                method = 'update',
                args = [{'visible': [False, True, False, False]},
                        {'title': 'CDC_ITF',
                        'showlegend':True}]),
            dict(label = 'JH_HIT',
                method = 'update',
                args = [{'visible': [False, False, True, False]},
                        {'title': 'JH_HIT',
                        'showlegend':True}]),
            dict(label = 'OXCGRT',
                method = 'update',
                args = [{'visible': [False, False, False, True]},
                        {'title': 'OXCGRT',
                        'showlegend':True}])],
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=1,
            xanchor="right",
            y=1.052,
            yanchor="top"
        ),
    ]
)

fig = fig.update_yaxes(tickfont=dict(size=6))
py.iplot(fig)

In [None]:
m

### New Measures

In [107]:
m = master.loc[master['processed'] == 'not_cleansed', ['dataset', 'who_code', 'who_id']]
m = m.groupby(['dataset', 'who_code']).count().reset_index()
#m['who_code'] = ['measure_' + x for x in m['who_code']]
#m = m.loc[m['dataset'] == 'OXCGRT', :]
m = m.pivot(index='who_code', columns='dataset', values='who_id')
m = m.sort_values(by = 'OXCGRT', ascending = False)

In [108]:
fig = go.Figure()

fig.update_yaxes(type='category')

for column in m.columns.to_list():
    fig.add_trace(
        go.Bar(
            y = m.index,
            x = m[column],
            name = column,
            orientation='h'
        )
    )
    
fig.update_layout(
    height = 1500,
    barmode='stack',
    title = "Records per WHO code",
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    updatemenus=[
        dict(
            buttons=[dict(label = 'All',
                  method = 'update',
                  args = [{'visible': [True, True, True, True]},
                          {'title': 'All',
                           'showlegend':True}]),
             dict(label = 'ACAPS',
                  method = 'update',
                  args = [{'visible': [True, False, False, False]},
                          {'title': 'OXCGRT',
                          'showlegend':True}]),
            dict(label = 'CDC_ITF',
                method = 'update',
                args = [{'visible': [False, True, False, False]},
                        {'title': 'CDC_ITF',
                        'showlegend':True}]),
            dict(label = 'JH_HIT',
                method = 'update',
                args = [{'visible': [False, False, True, False]},
                        {'title': 'JH_HIT',
                        'showlegend':True}]),
            dict(label = 'OXCGRT',
                method = 'update',
                args = [{'visible': [False, False, False, True]},
                        {'title': 'OXCGRT',
                        'showlegend':True}])],
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=1,
            xanchor="right",
            y=1.052,
            yanchor="top"
        ),
    ]
)

fig = fig.update_yaxes(tickfont=dict(size=10))
py.iplot(fig)

In [None]:
m

### Sequenced Measures

In [105]:
m = master.loc[master['processed'] == 'sequenced', ['dataset', 'who_code', 'who_id']]
m = m.groupby(['dataset', 'who_code']).count().reset_index()
#m['who_code'] = ['measure_' + x for x in m['who_code']]
#m = m.loc[m['dataset'] == 'OXCGRT', :]
m = m.pivot(index='who_code', columns='dataset', values='who_id')
m = m.sort_values(by = 'OXCGRT', ascending = False)

In [106]:
fig = go.Figure()

fig.update_yaxes(type='category')

for column in m.columns.to_list():
    fig.add_trace(
        go.Bar(
            y = m.index,
            x = m[column],
            name = column,
            orientation='h'
        )
    )
    
fig.update_layout(
    height = 1500,
    barmode='stack',
    title = "Records per WHO code",
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    updatemenus=[
        dict(
            buttons=[dict(label = 'All',
                  method = 'update',
                  args = [{'visible': [True, True, True, True]},
                          {'title': 'All',
                           'showlegend':True}]),
             dict(label = 'ACAPS',
                  method = 'update',
                  args = [{'visible': [True, False, False, False]},
                          {'title': 'OXCGRT',
                          'showlegend':True}]),
            dict(label = 'CDC_ITF',
                method = 'update',
                args = [{'visible': [False, True, False, False]},
                        {'title': 'CDC_ITF',
                        'showlegend':True}]),
            dict(label = 'JH_HIT',
                method = 'update',
                args = [{'visible': [False, False, True, False]},
                        {'title': 'JH_HIT',
                        'showlegend':True}]),
            dict(label = 'OXCGRT',
                method = 'update',
                args = [{'visible': [False, False, False, True]},
                        {'title': 'OXCGRT',
                        'showlegend':True}])],
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=1,
            xanchor="right",
            y=1.052,
            yanchor="top"
        ),
    ]
)

fig = fig.update_yaxes(tickfont=dict(size=10))
py.iplot(fig)

In [None]:
m