<a href="https://colab.research.google.com/github/pandemic-tracking/global-vaccine/blob/main/WHO_Priority_Groups_Last_Update.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**TODO**:
use fillna to get the latest numbers per group in this dataset

In [1]:
# Visualize how recent the priority groups data is
import pandas as pd
import altair as alt
import numpy as np
from google.colab import data_table
data_table.enable_dataframe_formatter()

In [2]:
# constants
# as of date for headlines
as_of='9/6'
# last update newer than this will show up in orange
last_update_cutoff='2022-06'
# URL for the latest (manually uploaded) priority groups data downloaded from WHO and uploaded to our S3
vax_url = 'https://pandemic-tracking-collective-data.s3.us-east-1.amazonaws.com/globalvax/GL/who_vaccine_uptake_by_priority_groups_09062022.csv'
print("who URL: https://app.powerbi.com/view?r=eyJrIjoiMWNjNzZkNjctZTNiNy00YmMzLTkxZjQtNmJiZDM2MTYxNzEwIiwidCI6ImY2MTBjMGI3LWJkMjQtNGIzOS04MTBiLTNkYzI4MGFmYjU5MCIsImMiOjh9")


who URL: https://app.powerbi.com/view?r=eyJrIjoiMWNjNzZkNjctZTNiNy00YmMzLTkxZjQtNmJiZDM2MTYxNzEwIiwidCI6ImY2MTBjMGI3LWJkMjQtNGIzOS04MTBiLTNkYzI4MGFmYjU5MCIsImMiOjh9


In [3]:

vax_df = pd.read_csv(vax_url)
vax_df
# Get the max date for each country 
max_vax_df = vax_df.groupby(['COUNTRYNAME','DEVELOPMENT_STATUS'], as_index=False)['DATE'].max()
max_vax_df.rename(columns = {'COUNTRYNAME':'Countries','DATE':'Last Update'}, inplace = True)

# Get the count of countries that last reported on each day
df = max_vax_df.groupby(by='Last Update', as_index=False).count()

df['diff_days'] = (pd.to_datetime(df['Last Update'], format='%Y-%m') - pd.to_datetime(last_update_cutoff, format='%Y-%m')) / np.timedelta64(1, 'D')
df


Unnamed: 0,Last Update,Countries,DEVELOPMENT_STATUS,diff_days
0,2021-04,1,1,-426.0
1,2021-05,2,2,-396.0
2,2021-08,1,1,-304.0
3,2021-09,2,2,-273.0
4,2021-11,3,3,-212.0
5,2021-12,5,5,-182.0
6,2022-01,4,4,-151.0
7,2022-02,3,3,-120.0
8,2022-03,1,1,-92.0
9,2022-04,3,3,-61.0


# New Section

# In the latest submission from each country, how many groups have a target size and how many don't?

In [4]:
# one country: rslt_df = vax_df[vax_df['COUNTRYNAME'] == 'Malawi']
# multiple countries 
# options = ['Eswatini', 'Malawi'] 
# rslt_df =  vax_df[vax_df['COUNTRYNAME'].isin(options)] 
print("Question: we are not sure why the WHO dashboard doesn't show all of Malawi's subgroups from 5/22 🤔\n") 

    
# selecting rows based on condition 
rslt_df = vax_df 
rslt_df['hasTarget']=rslt_df['NUMBER_TARGET'].apply(lambda x:True if(x>0) else False)

alt.Chart(rslt_df).mark_bar().encode(
    y='hasTarget:O',
    x='count(hasTarget):Q',
    tooltip='count(hasTarget):Q',
)


Question: we are not sure why the WHO dashboard doesn't show all of Malawi's subgroups from 5/22 🤔



In [5]:
groups_df = vax_df.groupby(['TARGET_GROUP_DESCRIPTION'], as_index=False)['DATE'].count().sort_values(by='DATE',ascending=False)
groups_df

Unnamed: 0,TARGET_GROUP_DESCRIPTION,DATE
5,Older people (aged 60 years or over),173
3,Health and care workers,163
7,People with co- morbidity,79
9,Residents of long-term care facilities (LTCFs),54
8,Pregnant Women,38
0,Adults with co-morbidity,34
11,refugees or Displaced population,26
10,Teachers and other essential workers,20
1,Children with co-morbidity,12
4,Immunocompromised persons,11


In [6]:
# Chart
bars = alt.Chart(df).mark_bar().encode(
    y='Last Update',
    x='Countries',
    # The highlight will be set on the result of a conditional statement
    color=alt.condition(
        alt.datum['Last Update'] > last_update_cutoff,  # If the date is greater than 2022-03 this test returns True,
        alt.value('orange'),     # which sets the bar orange.
        alt.value('steelblue')   # And if it's not true it sets the bar steelblue.
    ),
    tooltip=['Last Update', 'Countries'],
    ).properties(
        height=300,
        title='Countries by Last Update as of ' + as_of + '( orange > ' + last_update_cutoff + ')'
    )

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text='Countries:Q'
)

(bars + text)

In [7]:
# show with percentage of total
alt.Chart(df).transform_joinaggregate(
    TotalCountries='sum(Countries)',
).transform_calculate(
    PercentOfTotal="datum.Countries / datum.TotalCountries"
).mark_bar().encode(
    alt.X('PercentOfTotal:Q', axis=alt.Axis(format='.0%')),
    y='Last Update',
    # The highlight will be set on the result of a conditional statement
    color=alt.condition(
        alt.datum['Last Update'] > last_update_cutoff,  # If the date is greater than 2022-03 this test returns True,
        alt.value('orange'),     # which sets the bar orange.
        alt.value('steelblue')   # And if it's not true it sets the bar steelblue.
    ),
    tooltip=['Last Update', 'PercentOfTotal:Q'],
).properties(
        height=300,
        title='% of Countries by Last Update as of ' + as_of
    )

# List countries with priority group uptake older than cutoff

In [8]:
old_data = max_vax_df[max_vax_df['Last Update']<last_update_cutoff].sort_values(by='Last Update',ascending=False)
old_data

Unnamed: 0,Countries,DEVELOPMENT_STATUS,Last Update
89,Lesotho,Least developed,2022-05
18,Bhutan,Least developed,2022-05
143,Sierra Leone,Least developed,2022-05
134,Rwanda,Least developed,2022-05
6,Argentina,Developing,2022-05
84,Kenya,Developing,2022-05
21,Botswana,Developing,2022-04
22,Brazil,Developing,2022-04
77,Iraq,Developing,2022-04
139,Saudi Arabia,Developing,2022-03
