In [1]:
import pandas as pd
import wbgapi as wb
from pprint import pprint
import ipywidgets as widgets
from tabulate import tabulate
from IPython.core.display import HTML


In [2]:
# this could also be read direct from the API with the wbgapi package
df = pd.read_csv('all-indicators.csv').set_index(['db', 'cets'])
sources = {row['id']: row['code'] for row in wb.source.list()}

In [3]:
# build a list of cets codes and which databases they're in
db = {}
for (idx,data) in df.iterrows():
    id   = idx[0]
    cets = idx[1]
    if id != 57:
        if db.get(cets):
            db[cets].append(id)
        else:
            db[cets] = [id]

# now summarize occurences of each indicator
counts = {}
for k,v in db.items():
    sz = len(v)
    counts[sz] = counts.get(sz, 0) + 1

In [4]:
# for reference, here's the database list
wb.source.info()

id,name,lastupdated
1.0,Doing Business,2019-10-23
2.0,World Development Indicators,2021-03-19
3.0,Worldwide Governance Indicators,2020-09-28
5.0,Subnational Malnutrition Database,2016-03-21
6.0,International Debt Statistics,2021-01-21
11.0,Africa Development Indicators,2013-02-22
12.0,Education Statistics,2020-12-20
13.0,Enterprise Surveys,2021-04-02
14.0,Gender Statistics,2021-03-22
15.0,Global Economic Monitor,2020-07-27


In [20]:
# this is a tool that common instances of indicators across databases. Start by selecting the number of occurrences wanted
def show_occurrences(sz=2):
    
    sets = set()
    counters = {}
    for k,v in db.items():
        if len(v) == sz:
            sets.add(tuple(v))
            counters[tuple(v)] = counters.get(tuple(v), 0) + 1
          
    report = []
    for elem in sets:
        report.append([
            ', '.join(map(lambda x: str(x), elem)),
            ', '.join(map(lambda x: sources[str(x)], elem)),
            counters[elem]
        ])
    
    total = '<p>Total Indicators: {}</p>'.format(sum(counters.values()))
    return HTML(tabulate(report, tablefmt='html', headers=['Source IDs', 'Source Codes', '# Indicators']) + total)

  

options = list(counts.keys())
options.sort()
occur_slider = widgets.SelectionSlider(options=options[1:], continuous_update=False, description='Occurrences:')

widgets.interact(show_occurrences, sz=occur_slider)

interactive(children=(SelectionSlider(continuous_update=False, description='Occurrences:', options=(2, 3, 4, 5…

<function __main__.show_occurrences(sz=2)>

In [21]:
# This tool shows the indicators in common between a set of specified databases

databases = widgets.Text(placeholder='Enter comma-separated database IDs to search for', continuous_update=False)
exact = widgets.Checkbox(description='Exact match')

def indicator_report(ids='', exact=False):
    
    ids = ids.replace(' ', '')
    if len(ids) == 0:
        return HTML('')
    
    ids = set(map(lambda x: int(x), ids.split(',')))
    report = []
    for k,v in db.items():
        if ids == set(v) or (exact == False and ids & (ids & set(v)) == ids):
            report.append([
                ', '.join(map(lambda x: str(x), v)),
                ', '.join(map(lambda x: sources[str(x)], v)),
                k,
                df.loc[(v[0], k), 'name']
            ])
    
    if len(report) == 0:
        return HTML('No match')
    
    return HTML(tabulate(report, tablefmt='html', headers=['Source IDs', 'Source Codes', 'CETS Code', 'Name']))   


widgets.interact(indicator_report, ids=databases, exact=exact)

interactive(children=(Text(value='', continuous_update=False, description='ids', placeholder='Enter comma-sepa…

<function __main__.indicator_report(ids='', exact=False)>

In [29]:
# Conversely, this tool shows which databases a given indicator is included in

cets_lookup = widgets.Text(placeholder='Enter a CETS code', continuous_update=False)

def cets_report(cets):
    
    if len(cets.strip()) == 0:
        return HTML('')
    
    result = db.get(cets)
    if result is None:
        return HTML('Not found')
    
    result_str = '<p>' + ', '.join(map(lambda x: str(x), result)) + '</p>'
    return HTML(result_str + wb.source.info(result)._repr_html_())

widgets.interact(cets_report, cets=cets_lookup)

interactive(children=(Text(value='', continuous_update=False, description='cets', placeholder='Enter a CETS co…

<function __main__.cets_report(cets)>