In [1]:
import pandas as pd
import wbgapi as wb
from tqdm import tqdm
import pickle as pkl

# Explore Help Functions

In [2]:
list(wb.series.list())[0]

{'id': 'AG.AGR.TRAC.NO', 'value': 'Agricultural machinery, tractors'}

In [3]:
wb.source.info()

id,name,code,concepts,lastupdated
1.0,Doing Business,DBS,3.0,2021-08-18
2.0,World Development Indicators,WDI,3.0,2023-03-01
3.0,Worldwide Governance Indicators,WGI,3.0,2022-09-23
5.0,Subnational Malnutrition Database,SNM,3.0,2016-03-21
6.0,International Debt Statistics,IDS,4.0,2022-12-06
11.0,Africa Development Indicators,ADI,3.0,2013-02-22
12.0,Education Statistics,EDS,3.0,2020-12-20
13.0,Enterprise Surveys,ESY,3.0,2022-03-25
14.0,Gender Statistics,GDS,3.0,2023-03-06
15.0,Global Economic Monitor,GEM,3.0,2020-07-27


In [4]:
wb.economy.info(db = 2)

id,value,region,incomeLevel
ABW,Aruba,LCN,HIC
AFE,Africa Eastern and Southern,,
AFG,Afghanistan,SAS,LIC
AFW,Africa Western and Central,,
AGO,Angola,SSF,LMC
ALB,Albania,ECS,UMC
AND,Andorra,ECS,HIC
ARB,Arab World,,
ARE,United Arab Emirates,MEA,HIC
ARG,Argentina,LCN,UMC


In [5]:
wb.data.DataFrame('NY.GDP.PCAP.CD').reset_index()

Unnamed: 0,economy,YR1960,YR1961,YR1962,YR1963,YR1964,YR1965,YR1966,YR1967,YR1968,...,YR2012,YR2013,YR2014,YR2015,YR2016,YR2017,YR2018,YR2019,YR2020,YR2021
0,ABW,,,,,,,,,,...,25609.955724,26515.678080,26942.307976,28421.386493,28451.273745,29326.708058,30220.594523,31650.760537,24487.863560,29342.100858
1,AFE,162.913034,162.551683,172.002460,199.189238,179.387799,198.230368,209.414665,211.707060,224.239783,...,1759.182395,1730.394686,1719.183721,1538.552268,1443.692371,1628.586788,1564.734340,1512.270553,1363.540741,1549.772730
2,AFG,62.369375,62.443703,60.950364,82.021738,85.511073,105.243196,143.103233,167.165675,134.012768,...,663.141053,651.987862,628.146804,592.476537,520.252064,530.149831,502.056771,500.522664,516.866552,368.754614
3,AFW,106.976475,112.047561,117.730633,122.278715,130.599963,137.186142,142.895375,127.303606,128.365494,...,1953.407033,2149.295219,2243.271464,1876.623483,1645.023767,1585.911930,1731.311792,1749.303317,1683.436391,1757.030626
4,AGO,,,,,,,,,,...,4962.552072,5101.983876,5059.080441,3100.830685,1709.515534,2283.214233,2487.500996,2142.238757,1603.993477,1953.533757
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,XKX,,,,,,,,,,...,3410.859780,3704.784221,3902.676013,3520.766449,3759.560246,4009.380987,4384.048892,4416.108358,4310.811183,5269.783901
262,YEM,,,,,,,,,,...,1349.990295,1497.747941,1557.601406,1488.416482,1069.817122,893.716573,701.714878,,,
263,ZAF,529.561923,543.042224,560.699395,601.599951,642.688431,681.131112,718.118179,775.152812,813.508497,...,8173.869138,7441.230854,6965.137897,6204.929901,5735.066787,6734.475153,7048.522211,6688.787271,5741.643129,7055.044776
264,ZMB,228.567399,216.274674,208.562685,209.453362,236.941713,296.022427,334.672528,350.653425,398.557506,...,1729.647471,1840.320553,1724.576220,1307.909649,1249.923143,1495.752138,1475.204538,1268.120941,956.831364,1137.343633


# Pull in Trade Data for Oldest and Newest Years

In [6]:
trade_data1 = pd.read_stata("country_partner_sitcproduct4digit_year_1962.dta") 
trade_data2 = pd.read_stata("country_partner_sitcproduct4digit_year_2018.dta") 


In [7]:
country_list1 = list(trade_data1["location_code"]) + list(trade_data1["partner_code"])
country_list1 = list(set(country_list1))

country_list2 = list(trade_data2["location_code"]) + list(trade_data2["partner_code"])
country_list2 = list(set(country_list2))

In [8]:
len(country_list1)

156

In [9]:
len(country_list2)

236

In [10]:
economic_indicator = wb.data.DataFrame('NY.GDP.PCAP.CD').reset_index()

In [11]:
economic_indicator[~pd.isna(economic_indicator.YR1960) & economic_indicator.economy.isin(country_list1)]

Unnamed: 0,economy,YR1960,YR1961,YR1962,YR1963,YR1964,YR1965,YR1966,YR1967,YR1968,...,YR2012,YR2013,YR2014,YR2015,YR2016,YR2017,YR2018,YR2019,YR2020,YR2021
2,AFG,62.369375,62.443703,60.950364,82.021738,85.511073,105.243196,143.103233,167.165675,134.012768,...,663.141053,651.987862,628.146804,592.476537,520.252064,530.149831,502.056771,500.522664,516.866552,368.754614
13,AUS,1810.510249,1877.509803,1854.552575,1967.014259,2131.277657,2280.902108,2343.706917,2579.986339,2723.999484,...,68044.714816,68158.579743,62513.411217,56710.445724,49875.565143,53936.140255,57207.871509,54941.434179,51720.370763,60443.109165
14,AUT,935.460427,1031.815004,1087.834243,1167.000532,1269.412583,1374.532140,1486.968606,1569.667183,1677.673528,...,48564.917335,50731.127254,51786.377175,44195.817595,45307.587862,47429.158456,51466.556563,50070.403348,48809.226876,53637.705711
16,BDI,71.360224,72.088782,73.942008,78.948269,85.964725,50.990420,51.808640,54.450305,54.647466,...,238.205949,241.547671,257.818552,289.359633,242.065671,243.135809,231.446477,216.972968,216.826741,221.477676
17,BEL,1273.691659,1350.197673,1438.523233,1535.023729,1701.846276,1835.594766,1957.626080,2086.636005,2222.361511,...,44670.560685,46757.951856,47764.071512,41008.296719,42012.622719,44198.482391,47544.981147,46638.681305,45517.794930,51247.014353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,URY,491.213493,604.176627,659.611697,587.006885,744.879993,705.398553,668.328011,584.850133,578.588983,...,15206.872620,17015.134761,16875.506193,15655.936998,16766.425259,18769.787523,18825.283807,17859.931496,15619.542656,17313.188348
251,USA,3007.123445,3066.562869,3243.843078,3374.515171,3573.941185,3827.527110,4146.316646,4336.426587,4695.923390,...,51784.418574,53291.127689,55123.849787,56762.729452,57866.744934,59907.754261,62823.309438,65120.394663,63530.633484,70248.629000
253,VCT,155.293588,163.424034,166.602193,154.653976,163.883606,165.126044,173.196917,167.777520,160.299923,...,6754.370184,7117.554763,7210.616623,7386.737595,7684.779208,8030.585057,8399.694011,8674.371009,8335.256476,8666.387041
254,VEN,953.677944,968.767091,1022.034029,1076.532443,864.265327,869.906867,877.196049,894.683818,940.201420,...,12937.927597,12433.980785,15975.729375,,,,,,,


# Look at country coverage by each indicator

In [12]:
list_indicators = list(wb.series.list())

In [21]:
#load already processed data
with open('all_wb_indicators.pickle', 'rb') as handle:
    coverage = pkl.load(handle)

In [25]:
len(coverage)

1338

In [26]:
completed = [item[0] for item in coverage]

In [27]:
%%time
#coverage = []


for indicator in list_indicators:
    indicator_str = indicator["id"]
    
    print(indicator_str)
    
    if (indicator_str not in completed):
        #try: 
        indicator_df = wb.data.DataFrame(indicator_str).reset_index()

        coverage_1962 = len(indicator_df[~pd.isna(indicator_df.YR1962) \
                        & indicator_df.economy.isin(country_list1)])/len(country_list1)

        coverage_2020 = len(indicator_df[~pd.isna(indicator_df.YR2018) \
                        & indicator_df.economy.isin(country_list2)])/len(country_list2)

        coverage.append([indicator_str, indicator, coverage_1962, coverage_2020, indicator_df])
        
        with open('all_wb_indicators.pickle', 'wb') as handle:
            pkl.dump(coverage, handle)
        #except: 
        #    print(indicator_str)
        #    continue

AG.AGR.TRAC.NO
AG.CON.FERT.PT.ZS
AG.CON.FERT.ZS
AG.LND.AGRI.K2
AG.LND.AGRI.ZS
AG.LND.ARBL.HA
AG.LND.ARBL.HA.PC
AG.LND.ARBL.ZS
AG.LND.CREL.HA
AG.LND.CROP.ZS
AG.LND.EL5M.RU.K2
AG.LND.EL5M.RU.ZS
AG.LND.EL5M.UR.K2
AG.LND.EL5M.UR.ZS
AG.LND.EL5M.ZS
AG.LND.FRST.K2
AG.LND.FRST.ZS
AG.LND.IRIG.AG.ZS
AG.LND.PRCP.MM
AG.LND.TOTL.K2
AG.LND.TOTL.RU.K2
AG.LND.TOTL.UR.K2
AG.LND.TRAC.ZS
AG.PRD.CREL.MT
AG.PRD.CROP.XD
AG.PRD.FOOD.XD
AG.PRD.LVSK.XD
AG.SRF.TOTL.K2
AG.YLD.CREL.KG
BG.GSR.NFSV.GD.ZS
BM.GSR.CMCP.ZS
BM.GSR.FCTY.CD
BM.GSR.GNFS.CD
BM.GSR.INSF.ZS
BM.GSR.MRCH.CD
BM.GSR.NFSV.CD
BM.GSR.ROYL.CD
BM.GSR.TOTL.CD
BM.GSR.TRAN.ZS
BM.GSR.TRVL.ZS
BM.KLT.DINV.CD.WD
BM.KLT.DINV.WD.GD.ZS
BM.TRF.PRVT.CD
BM.TRF.PWKR.CD.DT
BN.CAB.XOKA.CD
BN.CAB.XOKA.GD.ZS
BN.FIN.TOTL.CD
BN.GSR.FCTY.CD
BN.GSR.GNFS.CD
BN.GSR.MRCH.CD
BN.KAC.EOMS.CD
BN.KLT.DINV.CD
BN.KLT.PTXL.CD
BN.RES.INCL.CD
BN.TRF.CURR.CD
BN.TRF.KOGT.CD
BX.GRT.EXTA.CD.WD
BX.GRT.TECH.CD.WD
BX.GSR.CCIS.CD
BX.GSR.CCIS.ZS
BX.GSR.CMCP.ZS
BX.GSR.FCTY.CD
BX.GSR.GNFS.CD
BX.G

SP.URB.TOTL
SP.URB.TOTL.IN.ZS
SP.UWT.TFRT
ST.INT.ARVL
ST.INT.DPRT
ST.INT.RCPT.CD
ST.INT.RCPT.XP.ZS
ST.INT.TRNR.CD
ST.INT.TRNX.CD
ST.INT.TVLR.CD
ST.INT.TVLX.CD
ST.INT.XPND.CD
ST.INT.XPND.MP.ZS
TG.VAL.TOTL.GD.ZS
TM.QTY.MRCH.XD.WD
TM.TAX.MANF.BC.ZS
TM.TAX.MANF.BR.ZS
TM.TAX.MANF.IP.ZS
TM.TAX.MANF.SM.AR.ZS
TM.TAX.MANF.SM.FN.ZS
TM.TAX.MANF.SR.ZS
TM.TAX.MANF.WM.AR.ZS
TM.TAX.MANF.WM.FN.ZS
TM.TAX.MRCH.BC.ZS
TM.TAX.MRCH.BR.ZS
TM.TAX.MRCH.IP.ZS
TM.TAX.MRCH.SM.AR.ZS
TM.TAX.MRCH.SM.FN.ZS
TM.TAX.MRCH.SR.ZS
TM.TAX.MRCH.WM.AR.ZS
TM.TAX.MRCH.WM.FN.ZS
TM.TAX.TCOM.BC.ZS
TM.TAX.TCOM.BR.ZS
TM.TAX.TCOM.IP.ZS
TM.TAX.TCOM.SM.AR.ZS
TM.TAX.TCOM.SM.FN.ZS
TM.TAX.TCOM.SR.ZS
TM.TAX.TCOM.WM.AR.ZS
TM.TAX.TCOM.WM.FN.ZS
TM.UVI.MRCH.XD.WD
TM.VAL.AGRI.ZS.UN
TM.VAL.FOOD.ZS.UN
TM.VAL.FUEL.ZS.UN
TM.VAL.ICTG.ZS.UN
TM.VAL.INSF.ZS.WT
TM.VAL.MANF.ZS.UN
TM.VAL.MMTL.ZS.UN
TM.VAL.MRCH.AL.ZS
TM.VAL.MRCH.CD.WT
TM.VAL.MRCH.HI.ZS
TM.VAL.MRCH.OR.ZS
TM.VAL.MRCH.R1.ZS
TM.VAL.MRCH.R2.ZS
TM.VAL.MRCH.R3.ZS
TM.VAL.MRCH.R4.ZS
TM.VAL.MRCH.R5.Z

# Resave dict verion of coverage for easier use

In [3]:
with open('all_wb_indicators.pickle', 'rb') as handle:
    coverage = pkl.load(handle)

In [4]:
coverage

[['AG.AGR.TRAC.NO',
  {'id': 'AG.AGR.TRAC.NO', 'value': 'Agricultural machinery, tractors'},
  0.8589743589743589,
  0.0,
      economy  YR1960    YR1961    YR1962    YR1963    YR1964    YR1965  \
  0       ABW     NaN       NaN       NaN       NaN       NaN       NaN   
  1       AFE     NaN  169776.0  176648.0  180067.0  185072.0  191771.0   
  2       AFG     NaN     120.0     150.0     200.0     200.0     300.0   
  3       AFW     NaN    2733.0    3772.0    4577.0    5404.0    6364.0   
  4       AGO     NaN     800.0    1645.0    2000.0    2817.0    3000.0   
  ..      ...     ...       ...       ...       ...       ...       ...   
  261     XKX     NaN       NaN       NaN       NaN       NaN       NaN   
  262     YEM     NaN     380.0     440.0     510.0     700.0     910.0   
  263     ZAF     NaN  122218.0  126923.0  130000.0  133552.0  138422.0   
  264     ZMB     NaN    2435.0    2760.0    2000.0    1600.0    1600.0   
  265     ZWE     NaN   12567.0   12860.0   13000.0  

In [5]:
coverage_dict = {}

for item in coverage:
    coverage_dict[item[0]] = item[4]

In [7]:
with open('all_wb_indicators_dict.pickle', 'wb') as handle:
    pkl.dump(coverage_dict, handle)