# OWID/WHO comparison

In [2]:
import pandas as pd
from datetime import datetime
from google.colab import data_table
data_table.enable_dataframe_formatter()

In [3]:
def print_bold(input):
	print('\033[1m{:10s}\033[0m'.format(input))

# Prepare dataset

In [34]:
# get OWID vaccination timeseries from Github
owid_data = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")

# drop non cumulative columns from OWID data
owid_data.drop(columns=[
    'daily_vaccinations','total_vaccinations_per_hundred','people_vaccinated_per_hundred','people_fully_vaccinated_per_hundred','total_boosters_per_hundred','daily_vaccinations_per_million',
    'daily_people_vaccinated','daily_people_vaccinated_per_hundred','daily_vaccinations_raw'], inplace = True)

# forward fill empty values in owid dataset
owid_data[['total_vaccinations','people_vaccinated','total_boosters']] = owid_data.groupby('iso_code')[['total_vaccinations','people_vaccinated','total_boosters']].apply(lambda x: x.fillna(method='ffill'))


# get latest date for each country in OWID dataset
latest_owid_dates = owid_data.groupby('iso_code')['date'].max().to_frame()

# merge full date 
latest_owid_data = pd.merge(latest_owid_dates,owid_data,on=['iso_code','date'])

# get WHO data
who_data = pd.read_csv("https://covid19.who.int/who-data/vaccination-data.csv")

# get PTC owid source classifications
owid_sources = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vTDKyIaQVtTIy7kn5pD2W8oKM3YoX3YOdSsH3q-r0INH2axjQl6YxgDHBi4HikKx_cmRElde_E-2vlr/pub?gid=2040574494&single=true&output=csv").filter(['Code','OWID Vax Source Category'])

# merge latest OWID data with WHO data
merged_data = pd.merge(latest_owid_data,who_data, how='inner', left_on='iso_code', right_on='ISO3').drop(columns=[
    'WHO_REGION','TOTAL_VACCINATIONS_PER100', 'PERSONS_VACCINATED_1PLUS_DOSE_PER100','PERSONS_FULLY_VACCINATED_PER100',
    'VACCINES_USED', 'FIRST_VACCINE_DATE', 'NUMBER_VACCINES_TYPES_USED', 'PERSONS_BOOSTER_ADD_DOSE_PER100',
    'PERSONS_BOOSTER_ADD_DOSE','PERSONS_VACCINATED_1PLUS_DOSE','people_vaccinated'
    ])
merged_data.rename(columns = {'date':'owid_date','DATE_UPDATED':'WHO_DATE',
'total_vaccinations':'owid_total_vaccinations',
'people_vaccinated':'owid_people_vaccinated',
'people_fully_vaccinated' : 'owid_people_fully_vaccinated',
'TOTAL_VACCINATIONS':'WHO_TOTAL_VACCINATIONS',
'PERSONS_VACCINATED_1PLUS_DOSE':'WHO_PERSONS_VACCINATED_1PLUS_DOSE',
'PERSONS_FULLY_VACCINATED':'WHO_PERSONS_FULLY_VACCINATED',
}, inplace = True)

# merge combined OWID+WHO data source with PTC owid source classifications
merged_data = pd.merge(merged_data,owid_sources, how='inner', left_on='ISO3', right_on='Code')

# calculate total vaccines diff
merged_data['diff_total_vaccinations'] = merged_data.WHO_TOTAL_VACCINATIONS - merged_data.owid_total_vaccinations

# calculate persons fully vaccinated diff
merged_data['diff_fully_vaccinated'] = merged_data.WHO_PERSONS_FULLY_VACCINATED - merged_data.owid_people_fully_vaccinated

print_bold('Diff = WHO - owid' )
merged_data





[1mDiff = WHO - owid[0m


Unnamed: 0,iso_code,owid_date,location,owid_total_vaccinations,owid_people_fully_vaccinated,total_boosters,COUNTRY,ISO3,DATA_SOURCE,WHO_DATE,WHO_TOTAL_VACCINATIONS,WHO_PERSONS_FULLY_VACCINATED,Code,OWID Vax Source Category,diff_total_vaccinations,diff_fully_vaccinated
0,ABW,2022-10-28,Aruba,173736.0,83768.0,,Aruba,ABW,REPORTING,2022-10-21,173634.0,83719.0,ABW,Country,-102.0,-49.0
1,AFG,2022-10-24,Afghanistan,12055358.0,10386823.0,,Afghanistan,AFG,REPORTING,2022-10-24,12055358.0,10386823.0,AFG,World Health Organization,0.0,0.0
2,AGO,2022-10-16,Angola,22814412.0,7814121.0,1127156.0,Angola,AGO,REPORTING,2022-10-16,22814412.0,7814121.0,AGO,World Health Organization,0.0,0.0
3,AIA,2022-10-21,Anguilla,24434.0,10366.0,2998.0,Anguilla,AIA,REPORTING,2022-10-21,24434.0,10366.0,AIA,Pan American Health Organization,0.0,0.0
4,ALB,2022-10-16,Albania,2991576.0,1265900.0,363122.0,Albania,ALB,REPORTING,2022-10-16,2991576.0,1265900.0,ALB,World Health Organization,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,WSM,2022-10-17,Samoa,448674.0,177605.0,78912.0,Samoa,WSM,REPORTING,2022-09-26,440413.0,177489.0,WSM,Pacific Data Hub (PDH),-8261.0,-116.0
210,YEM,2022-10-20,Yemen,1139242.0,669393.0,54985.0,Yemen,YEM,REPORTING,2022-10-20,1139242.0,669393.0,YEM,World Health Organization,0.0,0.0
211,ZAF,2022-10-30,South Africa,37796701.0,19466838.0,3789338.0,South Africa,ZAF,REPORTING,2022-10-16,37716831.0,20852273.0,ZAF,Country,-79870.0,1385435.0
212,ZMB,2022-10-27,Zambia,12008116.0,8082705.0,33684.0,Zambia,ZMB,REPORTING,2022-10-16,9315275.0,6354938.0,ZMB,Country,-2692841.0,-1727767.0


# Using the merged dataset 

In [5]:
print_bold('\nTotal countries')
print(len( merged_data))

print_bold('\nDates')
dates_matching = len( merged_data.loc[merged_data['owid_date'] == merged_data['WHO_DATE']])
dates_owid_greater = len(merged_data.loc[merged_data['owid_date'] > merged_data['WHO_DATE']])
dates_owid_lesser = len(merged_data.loc[merged_data['owid_date'] < merged_data['WHO_DATE']])
print('Matching dates:' , dates_matching, 'countries')
print('owid greater ' , dates_owid_greater)
print('owid lesser ' , dates_owid_lesser)


matching_total_vaccinations_df = merged_data.loc[merged_data['owid_total_vaccinations'] == merged_data['WHO_TOTAL_VACCINATIONS']]
totalvax_matching = len( matching_total_vaccinations_df.index)
total_matching_vax_who = int(matching_total_vaccinations_df['WHO_TOTAL_VACCINATIONS'].sum(axis=0))
total_matching_vax_owid = int(matching_total_vaccinations_df['owid_total_vaccinations'].sum(axis=0))
print_bold('\nMatching total vaccinations: ' + str(totalvax_matching) + ' countries')
print('WHO total doses: %s' % "{:,}".format(total_matching_vax_who))
print('OWID total doses: %s' % "{:,}".format(total_matching_vax_owid))

# owid greater

owid_greater_total_vaccinations_df = merged_data.loc[merged_data['owid_total_vaccinations'] > merged_data['WHO_TOTAL_VACCINATIONS']]
owid_greater_count = len(owid_greater_total_vaccinations_df)
owid_greater_who_totalvax = int(owid_greater_total_vaccinations_df['WHO_TOTAL_VACCINATIONS'].sum(axis=0))
owid_greater_owid_totalvax = int(owid_greater_total_vaccinations_df['owid_total_vaccinations'].sum(axis=0))

print_bold('\nOWID greater total doses: ' + str(owid_greater_count) +  ' countries')
print('WHO total doses: %s' % "{:,}".format(owid_greater_who_totalvax))
print('OWID total doses: %s' % "{:,}".format(owid_greater_owid_totalvax))
print('DIFF: %s' % "{:,}".format(owid_greater_owid_totalvax-owid_greater_who_totalvax))

# owid lesser
owid_lesser_total_vaccinations_df = merged_data.loc[merged_data['owid_total_vaccinations'] < merged_data['WHO_TOTAL_VACCINATIONS']]
owid_lesser_count = len(owid_lesser_total_vaccinations_df)
owid_lesser_who_totalvax = int(owid_lesser_total_vaccinations_df['WHO_TOTAL_VACCINATIONS'].sum(axis=0))
owid_lesser_owid_totalvax = int(owid_lesser_total_vaccinations_df['owid_total_vaccinations'].sum(axis=0))

print_bold('\nOWID lesser total doses: '  +  str(owid_lesser_count) +  ' countries')
print('WHO total doses: %s' % "{:,}".format(owid_lesser_who_totalvax))
print('OWID total doses: %s' % "{:,}".format(owid_lesser_owid_totalvax))
print('DIFF: %s' % "{:,}".format(owid_lesser_who_totalvax-owid_lesser_owid_totalvax))


print_bold('\nTotal Doses')
totalvax_who = int(merged_data['WHO_TOTAL_VACCINATIONS'].sum(axis=0))
total_vax_owid= int(merged_data['owid_total_vaccinations'].sum(axis=0))
total_vax_diff = totalvax_who-total_vax_owid
print('WHO total doses: %s' % "{:,}".format(totalvax_who))
print('OWID total doses: %s' % "{:,}".format(total_vax_owid))
print('Overall diff total doses (WHO-Owid): %s' % "{:,}".format(total_vax_diff))

# dictionary of lists
dict = {
        'Date' : [pd.Timestamp.now(tz = 'US/Eastern')],
        'Total countries':[len( merged_data)],
        'Dates - matching': [dates_matching],
        'Dates - OWID greater':[dates_owid_greater],
        'Dates - OWID lesser':[dates_owid_lesser],
        'Total Vax - matching': [totalvax_matching],
        'Total Vax - matching- WHO total':[total_matching_vax_who],
        'Total Vax - matching- OWID total':[total_matching_vax_owid],
        'Total Vax - OWID greater - count':[owid_greater_count],
        'Total Vax - OWID greater - WHO total':[owid_greater_who_totalvax],
        'Total Vax - OWID greater - OWID total':[owid_greater_owid_totalvax],
        'Total Vax - OWID greater - Diff':[owid_greater_owid_totalvax-owid_greater_who_totalvax],
        'Total Vax - OWID lesser - count':[owid_lesser_count],
        'Total Vax - OWID lesser - WHO total':[owid_lesser_who_totalvax],
        'Total Vax - OWID lesser - OWID total':[owid_lesser_owid_totalvax],
        'Total Vax - OWID lesser - Diff':[owid_lesser_who_totalvax-owid_lesser_owid_totalvax],
        'Total Vax - WHO total':[totalvax_who],
        'Total Vax - OWID total':[total_vax_owid],
        'Total Vax - Diff':[total_vax_diff],

        }

 
df = pd.DataFrame(dict)
 
df


[1m
Total countries[0m
214
[1m
Dates    [0m
Matching dates: 102 countries
owid greater  70
owid lesser  42
[1m
Matching total vaccinations: 118 countries[0m
WHO total doses: 2,875,090,104
OWID total doses: 2,875,090,104
[1m
OWID greater total doses: 66 countries[0m
WHO total doses: 5,027,911,800
OWID total doses: 5,085,824,657
DIFF: 57,912,857
[1m
OWID lesser total doses: 30 countries[0m
WHO total doses: 4,915,787,828
OWID total doses: 4,840,449,403
DIFF: 75,338,425
[1m
Total Doses[0m
WHO total doses: 12,818,789,732
OWID total doses: 12,801,364,164
Overall diff total doses (WHO-Owid): 17,425,568


Unnamed: 0,Date,Total countries,Dates - matching,Dates - OWID greater,Dates - OWID lesser,Total Vax - matching,Total Vax - matching- WHO total,Total Vax - matching- OWID total,Total Vax - OWID greater - count,Total Vax - OWID greater - WHO total,Total Vax - OWID greater - OWID total,Total Vax - OWID greater - Diff,Total Vax - OWID lesser - count,Total Vax - OWID lesser - WHO total,Total Vax - OWID lesser - OWID total,Total Vax - OWID lesser - Diff,Total Vax - WHO total,Total Vax - OWID total,Total Vax - Diff
0,2022-10-31 16:17:56.805502-04:00,214,102,70,42,118,2875090104,2875090104,66,5027911800,5085824657,57912857,30,4915787828,4840449403,75338425,12818789732,12801364164,17425568


# Isolate anomalies

In [36]:
# OWID source is WHO but OWID total vaccinations do not match WHO
og = merged_data.loc[
          (merged_data["OWID Vax Source Category"] == 'World Health Organization') &
          (merged_data.diff_total_vaccinations!=0) 
].drop(columns=['total_boosters','COUNTRY','ISO3','DATA_SOURCE','Code','OWID Vax Source Category']).sort_values(by=['diff_total_vaccinations'])
print('found mismatches: ', len(og))
og

found mismatches:  3


Unnamed: 0,iso_code,owid_date,location,owid_total_vaccinations,owid_people_fully_vaccinated,WHO_DATE,WHO_TOTAL_VACCINATIONS,WHO_PERSONS_FULLY_VACCINATED,diff_total_vaccinations,diff_fully_vaccinated
130,MOZ,2022-10-16,Mozambique,24676660.0,13525638.0,2022-10-16,19387285.0,13525638.0,-5289375.0,0.0
194,TUN,2022-10-18,Tunisia,14827155.0,6384257.0,2022-10-18,13150936.0,6384257.0,-1676219.0,0.0
114,LSO,2022-07-17,Lesotho,1077116.0,872661.0,2022-08-14,1102069.0,872661.0,24953.0,0.0


# Inferring WHO sources

In [7]:
def inferred_source(owid_source, who_source, diff_total_vaccinations):
  if ((owid_source == 'Country') or ((owid_source == 'Other'))) and (diff_total_vaccinations==0):
    return "Maybe Public Reporting"
  elif (owid_source == 'Country') and (who_source == 'REPORTING')  and (diff_total_vaccinations!=0):
    return "Direct Reporting to WHO"
  else:
    return 'Unknown'

merged_data['who_inferred_source'] = merged_data.apply(lambda x: inferred_source(x['OWID Vax Source Category'], x['DATA_SOURCE'], x['diff_total_vaccinations']), axis=1)
merged_data

Unnamed: 0,iso_code,owid_date,location,owid_total_vaccinations,people_fully_vaccinated,total_boosters,COUNTRY,ISO3,DATA_SOURCE,WHO_DATE,WHO_TOTAL_VACCINATIONS,Code,OWID Vax Source Category,diff_total_vaccinations,who_inferred_source
0,ABW,2022-10-28,Aruba,173736.0,83768.0,,Aruba,ABW,REPORTING,2022-10-21,173634.0,ABW,Country,-102.0,Direct Reporting to WHO
1,AFG,2022-10-24,Afghanistan,12055358.0,10386823.0,,Afghanistan,AFG,REPORTING,2022-10-24,12055358.0,AFG,World Health Organization,0.0,Unknown
2,AGO,2022-10-16,Angola,22814412.0,7814121.0,1127156.0,Angola,AGO,REPORTING,2022-10-16,22814412.0,AGO,World Health Organization,0.0,Unknown
3,AIA,2022-10-21,Anguilla,24434.0,10366.0,2998.0,Anguilla,AIA,REPORTING,2022-10-21,24434.0,AIA,Pan American Health Organization,0.0,Unknown
4,ALB,2022-10-16,Albania,2991576.0,1265900.0,363122.0,Albania,ALB,REPORTING,2022-10-16,2991576.0,ALB,World Health Organization,0.0,Unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,WSM,2022-10-17,Samoa,448674.0,177605.0,78912.0,Samoa,WSM,REPORTING,2022-09-26,440413.0,WSM,Pacific Data Hub (PDH),-8261.0,Unknown
210,YEM,2022-10-20,Yemen,1139242.0,669393.0,54985.0,Yemen,YEM,REPORTING,2022-10-20,1139242.0,YEM,World Health Organization,0.0,Unknown
211,ZAF,2022-10-30,South Africa,37796701.0,19466838.0,3789338.0,South Africa,ZAF,REPORTING,2022-10-16,37716831.0,ZAF,Country,-79870.0,Direct Reporting to WHO
212,ZMB,2022-10-27,Zambia,12008116.0,8082705.0,33684.0,Zambia,ZMB,REPORTING,2022-10-16,9315275.0,ZMB,Country,-2692841.0,Direct Reporting to WHO


In [8]:
import altair as alt
alt.Chart(merged_data).mark_bar().encode(
    y=alt.Y('who_inferred_source'),
    x='count()',
    tooltip='count()'
)

# SCRAP: UNDP DATA EXPLORATION
UNDP data is updated once a week, so during the week, they are lagging the WHO numbers

In [19]:
print('UNDP data download linked in https://github.com/UNDP-Data/Vaccine-Equity-Dashboard-Data')
undp_current_csv = "https://raw.githubusercontent.com/UNDP-Data/Vaccine-Equity-Dashboard-Data/main/Data.csv"
undp_data = pd.read_csv(undp_current_csv)

# drop columns from UNDP data
undp_data.drop(columns=[
   'y2021GGXWDG_NGDP','vaccinesdelivered_lag',
       'y2021NGDPDPC', 'hdi', 'un_population2020',
       'informcovid19risk', 'covid19riskclass', 'gap_percapita', 'gap_gghe',
       'uhc', 'che', 'h7_vaccinationpolicy', 'e4_internationalsupport',
       'h5_investmentinvaccines', 'stringencyindex_average',
       'economicsupportindex', 'h7_name', 
       'number_vaccines_types_used', 'first_vaccine_date',
       'deathscumulativetotal', 'deathsnewlyreportedinlast7days',
       'vaccine_willingness_monthly', 'v3_vaccinefinancialsupportsummar',
       'v3_name', 'population', 'total_boosters', 'total_boosters_per_hundred',
       'dec40target', 'dec40target_name', 'securedvaccineofpopulation',
       'total_donations', 'cost_gdp_income40', 'pricebracket_che40',
       'utilrate_lag', 'donationsoftotal', 'casescumulativetotalper100000pop',
       'casesnewlyreportedinlast7daysper', 'deathscumulativetotalper100000po',
       'deathsnewlyreportedinlast7dayspe','vaccines_used'], inplace = True)

# merged undp dataset with owid+who datasaet
undp_merged = pd.merge(merged_data,undp_data, how='inner', left_on='iso_code', right_on='iso3').drop(columns=['COUNTRY','ISO3','total_boosters','vaccinesdelivered','deliveredpopulation','persons_vaccinated_1plus_dose','who_inferred_source',
                                                                                                              'iso_code','who_inferred_source','Code','countryname','iso3','people_fully_vaccinated','persons_fully_vaccinated',
                                                                                                              'persons_vaccinated_1plus_dose_pe'
                                                                                                              ])
undp_merged['diff_undp_who_totalvax'] = undp_merged.WHO_TOTAL_VACCINATIONS - undp_merged.total_vaccinations

#undp_merged['diff_undp_owid_totalvax'] = undp_merged.owid_total_vaccinations - undp_merged.total_vaccinations
undp_merged.loc[ undp_merged['diff_undp_who_totalvax'] != 0.0 ]


UNDP data download linked in https://github.com/UNDP-Data/Vaccine-Equity-Dashboard-Data


Unnamed: 0,owid_date,location,owid_total_vaccinations,DATA_SOURCE,WHO_DATE,WHO_TOTAL_VACCINATIONS,OWID Vax Source Category,diff_total_vaccinations,incomegroup,pop_total,total_vaccinations,total_vaccinations_per100,persons_fully_vaccinated_per100,diff_undp_who_totalvax
0,2022-10-28,Aruba,173736.0,REPORTING,2022-10-21,173634.0,Country,-102.0,High Income Economies,106766.0,173582.0,162.582000,78.389999,52.0
1,2022-10-24,Afghanistan,12055358.0,REPORTING,2022-10-24,12055358.0,World Health Organization,0.0,Low Income Economies,38928340.0,11977773.0,30.768999,26.531000,77585.0
2,2022-10-16,Angola,22814412.0,REPORTING,2022-10-16,22814412.0,World Health Organization,0.0,Lower Middle Income Economies,32866268.0,22696229.0,69.056000,23.775999,118183.0
3,2022-10-21,Anguilla,24434.0,REPORTING,2022-10-21,24434.0,Pan American Health Organization,0.0,Unclassified Economies,15002.0,24412.0,162.725010,68.950996,22.0
13,2022-10-16,Burundi,25258.0,REPORTING,2022-10-16,25258.0,World Health Organization,0.0,Low Income Economies,11890781.0,24236.0,0.204000,0.183000,1022.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,2022-10-21,Saint Vincent and the Grenadines,72935.0,REPORTING,2022-10-21,72935.0,World Health Organization,0.0,Upper Middle Income Economies,110947.0,72875.0,65.689003,28.309000,60.0
206,2022-10-13,Vietnam,260375471.0,REPORTING,2022-10-13,260375471.0,World Health Organization,0.0,Lower Middle Income Economies,97338584.0,260246761.0,267.362000,86.696999,128710.0
210,2022-10-20,Yemen,1139242.0,REPORTING,2022-10-20,1139242.0,World Health Organization,0.0,Low Income Economies,29825968.0,1065395.0,3.572000,2.026000,73847.0
211,2022-10-30,South Africa,37796701.0,REPORTING,2022-10-16,37716831.0,Country,-79870.0,Upper Middle Income Economies,59308688.0,37679458.0,63.530998,35.123001,37373.0


Today, 10/27, UNDP total vaccine numbers match the numbers from 10/25 

In [10]:
undp_current_csv = "https://raw.githubusercontent.com/UNDP-Data/Vaccine-Equity-Dashboard-Data/3cbebe982bc009595ab1b4f93c156fc9c815cf3f/Data.csv" # "https://raw.githubusercontent.com/UNDP-Data/Vaccine-Equity-Dashboard-Data/main/Data.csv"
who_older_csv = "http://pandemic-tracking-collective-data.s3-website-us-east-1.amazonaws.com/globalvax/merged/2022-10-26-17-50-04-234055-merged.csv"
who_older_csv = "http://pandemic-tracking-collective-data.s3-website-us-east-1.amazonaws.com/globalvax/merged/2022-10-25-17-50-04-270516-merged.csv"



undp_data = pd.read_csv(undp_current_csv)
who_older_data = pd.read_csv(who_older_csv)

# drop columns from UNDP data
undp_data.drop(columns=[
   'y2021GGXWDG_NGDP','vaccinesdelivered_lag',
       'y2021NGDPDPC', 'hdi', 'un_population2020',
       'informcovid19risk', 'covid19riskclass', 'gap_percapita', 'gap_gghe',
       'uhc', 'che', 'h7_vaccinationpolicy', 'e4_internationalsupport',
       'h5_investmentinvaccines', 'stringencyindex_average',
       'economicsupportindex', 'h7_name', 
       'number_vaccines_types_used', 'first_vaccine_date',
       'deathscumulativetotal', 'deathsnewlyreportedinlast7days',
       'vaccine_willingness_monthly', 'v3_vaccinefinancialsupportsummar',
       'v3_name', 'population', 'total_boosters', 'total_boosters_per_hundred',
       'dec40target', 'dec40target_name', 'securedvaccineofpopulation',
       'total_donations', 'cost_gdp_income40', 'pricebracket_che40',
       'utilrate_lag', 'donationsoftotal', 'casescumulativetotalper100000pop',
       'casesnewlyreportedinlast7daysper', 'deathscumulativetotalper100000po',
       'deathsnewlyreportedinlast7dayspe','vaccines_used'], inplace = True)

who_older_data.columns


# merged undp dataset with owid+who datasaet
undp_merged2 = pd.merge(who_older_data,undp_data, how='inner', left_on='ISO3', right_on='iso3').drop(columns=['total_boosters','people_fully_vaccinated','persons_fully_vaccinated','persons_vaccinated_1plus_dose','persons_vaccinated_1plus_dose_pe',
         'iso_code', 'countryname','countryname','iso3','deliveredpopulation','vaccinesdelivered','deliveredpopulation' ,'persons_fully_vaccinated_per100' ,'Unnamed: 0'  ,'COUNTRY'                                                                                             ''
                                                                                                             ''])
undp_merged2['diff_undp_who_totalvax'] = undp_merged2.WHO_TOTAL_VACCINATIONS - undp_merged2.total_vaccinations
undp_diff2 = undp_merged2.loc[ undp_merged2['diff_undp_who_totalvax'] != 0 ]
print('different: ', len(undp_diff2))
undp_diff2

different:  0


Unnamed: 0,owid_date,location,owid_total_vaccinations,ISO3,DATA_SOURCE,WHO_DATE,WHO_TOTAL_VACCINATIONS,Code,OWID Vax Source Category,diff_total_vaccinations,incomegroup,pop_total,total_vaccinations,total_vaccinations_per100,diff_undp_who_totalvax


# How often does WHO update the vaccine administration data?
It seems like they update on a weekly cadence (maybe on Tuesdays) 

But since UNDP grabbed it the day before they updated, they are out of sync for a whole week

In [32]:
who_last_url = "https://pandemic-tracking-collective-data.s3.us-east-1.amazonaws.com/globalvax/GL/GL-who_vaccination_data-20221031-134504.csv"
who_previous_1_url = "https://pandemic-tracking-collective-data.s3.us-east-1.amazonaws.com/globalvax/GL/GL-who_vaccination_data-20221025-134505.csv"


who_last = pd.read_csv(who_last_url)
who_previous_1 = pd.read_csv(who_previous_1_url)
merged_who = pd.merge(who_last,who_previous_1, how='inner', left_on='ISO3', right_on='ISO3')
merged_who.loc[ merged_who['DATE_UPDATED_x'] != merged_who['DATE_UPDATED_y'] ]    




Unnamed: 0,COUNTRY_x,ISO3,WHO_REGION_x,DATA_SOURCE_x,DATE_UPDATED_x,TOTAL_VACCINATIONS_x,PERSONS_VACCINATED_1PLUS_DOSE_x,TOTAL_VACCINATIONS_PER100_x,PERSONS_VACCINATED_1PLUS_DOSE_PER100_x,PERSONS_FULLY_VACCINATED_x,...,PERSONS_VACCINATED_1PLUS_DOSE_y,TOTAL_VACCINATIONS_PER100_y,PERSONS_VACCINATED_1PLUS_DOSE_PER100_y,PERSONS_FULLY_VACCINATED_y,PERSONS_FULLY_VACCINATED_PER100_y,VACCINES_USED_y,FIRST_VACCINE_DATE_y,NUMBER_VACCINES_TYPES_USED_y,PERSONS_BOOSTER_ADD_DOSE_y,PERSONS_BOOSTER_ADD_DOSE_PER100_y
0,Afghanistan,AFG,EMRO,REPORTING,2022-10-24,12055358.0,11084618.0,30.968,28.474,10386823.0,...,11012345.0,30.769,28.289,10328057.0,26.531,"AstraZeneca - Vaxzevria,Beijing CNBG - BBIBP-C...",2021-02-22,11.0,,
5,Angola,AGO,AFRO,REPORTING,2022-10-16,22814412.0,14220830.0,69.416,43.269,7814121.0,...,14220830.0,69.056,43.269,7814121.0,23.776,SII - Covishield,2021-03-10,1.0,1127156.0,3.430
6,Anguilla,AIA,AMRO,REPORTING,2022-10-21,24434.0,10852.0,162.872,72.337,10366.0,...,10852.0,162.725,72.337,10344.0,68.951,"AstraZeneca - Vaxzevria,Pfizer BioNTech - Comi...",2021-02-05,2.0,2998.0,19.984
7,Antigua and Barbuda,ATG,AMRO,REPORTING,2022-10-21,136512.0,64290.0,139.399,65.650,62384.0,...,64290.0,139.399,65.650,62384.0,63.703,"AstraZeneca - Vaxzevria,Beijing CNBG - BBIBP-C...",2021-03-01,6.0,9838.0,10.046
10,Aruba,ABW,AMRO,REPORTING,2022-10-21,173634.0,89915.0,162.630,84.217,83719.0,...,89888.0,162.582,84.192,83694.0,78.390,"Janssen - Ad26.COV 2-S,Pfizer BioNTech - Comir...",2021-02-17,2.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,Venezuela (Bolivarian Republic of),VEN,AMRO,REPORTING,2022-10-21,37860994.0,22157232.0,133.145,77.920,14287370.0,...,22157232.0,133.145,77.920,14287370.0,50.244,"Beijing CNBG - BBIBP-CorV,CIGB - CIGB-66,Finla...",2021-02-22,6.0,651502.0,2.291
224,Viet Nam,VNM,WPRO,REPORTING,2022-10-13,260375471.0,90028108.0,267.495,92.490,84393196.0,...,90043685.0,267.362,92.506,84389728.0,86.697,"AstraZeneca - Vaxzevria,Beijing CNBG - BBIBP-C...",2021-03-08,6.0,55853931.0,57.381
226,Yemen,YEM,EMRO,REPORTING,2022-10-20,1139242.0,914722.0,3.820,3.067,669393.0,...,861625.0,3.572,2.889,604202.0,2.026,"AstraZeneca - Vaxzevria,Beijing CNBG - BBIBP-C...",2021-04-20,11.0,33507.0,0.112
227,Zambia,ZMB,AFRO,REPORTING,2022-10-16,9315275.0,8556919.0,50.671,46.546,6354938.0,...,8233510.0,50.671,44.786,6043203.0,32.872,"Beijing CNBG - BBIBP-CorV,Janssen - Ad26.COV 2...",2021-04-14,3.0,668550.0,3.637
