In [1]:
from functools import reduce
import pandas as pd
from polio_utils import download_polio_data, extract_wild_cases, extract_vd_cases, owid_population


Download and extract the data from the latest wild polio virus pdf

In [2]:
res = download_polio_data(url_stub='https://polioeradication.org/wp-content/uploads/2022/03/weekly-polio-analyses-WPV-')

with open('data/polio_wild_cases.pdf', 'wb') as f:
    f.write(res.content)

wt_df = extract_wild_cases(file_path='data/polio_wild_cases.pdf')

Calculate cases per million population

In [38]:
population = owid_population()
wt_df['year'] = wt_df['year'].astype(int)
wt_df['wild_polio_1_cases'] = wt_df['wild_polio_1_cases'].astype(int)

wt_pop = pd.DataFrame(pd.merge(left = population, right = wt_df, how="right"))
wt_pop['wild_polio_1_cases_per_million'] = (wt_pop['wild_polio_1_cases']/wt_pop['population']) * 1000000
wt_pop['wild_polio_1_cases_per_million'] = wt_pop['wild_polio_1_cases_per_million'].fillna(0).round(decimals=3)

wt_pop

Unnamed: 0,entity,year,population,wild_polio_1_cases,wild_polio_1_cases_per_million
0,Pakistan,2016,203631360.0,20,0.098
1,Afghanistan,2016,35383028.0,13,0.367
2,Malawi,2016,17205254.0,0,0.0
3,Nigeria3,2016,,4,0.0
4,Iran,2016,79563992.0,0,0.0
5,Total (Type1),2016,,37,0.0
6,Pakistan,2017,207906208.0,8,0.038
7,Afghanistan,2017,36296108.0,14,0.386
8,Malawi,2017,17670194.0,0,0.0
9,Nigeria3,2017,,0,0.0


Download and extract the data from the latest vaccine derived polio cases pdf

In [3]:
res = download_polio_data(url_stub='http://polioeradication.org/wp-content/uploads/2022/03/weekly-polio-analyses-cVDPV-')

with open('data/polio_vaccine_derived_cases.pdf', 'wb') as f:
    f.write(res.content)

vd_df = extract_vd_cases(file_path = 'data/polio_vaccine_derived_cases.pdf')

In [39]:
population = owid_population()

vd_df['year'] = vd_df['year'].astype(int)

vd_pop = pd.DataFrame(pd.merge(left = population, right = vd_df, how="right"))
vd_pop['cVDPV1_per_million'] = (vd_pop['cVDPV1']/vd_pop['population']) * 1000000
vd_pop['cVDPV2_per_million'] = (vd_pop['cVDPV2']/vd_pop['population']) * 1000000
vd_pop['cVDPV3_per_million'] = (vd_pop['cVDPV3']/vd_pop['population']) * 1000000
vd_pop['total_cVDPV_per_million'] = (vd_pop['total_cVDPV']/vd_pop['population']) * 1000000

#vd_pop['wild_polio_1_cases_per_million'].fillna(0).round(decimals=3)


In [51]:
polio_dataframes = [wt_pop, vd_pop]

polio_df = reduce(
        lambda left, right: pd.merge(left, right, on=["entity", "year", "population"], how="outer"),
        polio_dataframes,
    )

In [57]:
cols = polio_df.columns.drop(['entity', 'year'])

int_cols = [k for k in cols if 'per_million' not in k]

float_cols = [k for k in cols if 'per_million' in k]


In [61]:
polio_df[int_cols] = polio_df[int_cols].fillna(0).astype(int)
polio_df[float_cols] = polio_df[float_cols].fillna(0).round(3)

In [65]:
polio_df.rename(columns = {'entity':'Country'})['Country'].to_csv('data/countries_to_standardise.csv', index = False)