# The objective of this Notebook is to transform World Bank Data into Parquet Structures Files

## Economies Data

In [1]:
import wbgapi as wb
import pandas as pd

df_economies = pd.DataFrame(wb.economy.info().items)
df_economies

Unnamed: 0,id,value,aggregate,longitude,latitude,region,adminregion,lendingType,incomeLevel,capitalCity
0,ABW,Aruba,False,-70.0167,12.51670,LCN,,LNX,HIC,Oranjestad
1,AFE,Africa Eastern and Southern,True,,,,,,,
2,AFG,Afghanistan,False,69.1761,34.52280,SAS,SAS,IDX,LIC,Kabul
3,AFW,Africa Western and Central,True,,,,,,,
4,AGO,Angola,False,13.2420,-8.81155,SSF,SSA,IBD,LMC,Luanda
...,...,...,...,...,...,...,...,...,...,...
261,XKX,Kosovo,False,20.9260,42.56500,ECS,ECA,IDX,UMC,Pristina
262,YEM,"Yemen, Rep.",False,44.2075,15.35200,MEA,MNA,IDX,LIC,Sana'a
263,ZAF,South Africa,False,28.1871,-25.74600,SSF,SSA,IBD,UMC,Pretoria
264,ZMB,Zambia,False,28.2937,-15.39820,SSF,SSA,IDX,LMC,Lusaka


In [2]:
import pyarrow as pa
import pyarrow.parquet as pq

economies_schema = pa.schema([
('id', pa.string()),
('value', pa.string()),
('aggregate', pa.bool_()),
('longitude', pa.float64()),
('latitude', pa.float64()),
('region', pa.string()),
('adminregion', pa.string()),
('lendingType', pa.string()),
('incomeLevel', pa.string()),
('capitalCity', pa.string())
])

batch = pa.RecordBatch.from_pandas(df_economies)

table = pa.Table.from_batches([batch])

In [3]:
pq.write_table(table, 'Parquet/economies.parquet')

## Topics Data

In [5]:
import wbgapi as wb
import pandas as pd

df_topics = pd.DataFrame(wb.topic.info().items)
df_topics

Unnamed: 0,id,value,sourceNote
0,1,Agriculture & Rural Development,For the 70 percent of the world's poor who liv...
1,2,Aid Effectiveness,Aid effectiveness is the impact that aid has i...
2,3,Economy & Growth,Economic growth is central to economic develop...
3,4,Education,Education is one of the most powerful instrume...
4,5,Energy & Mining,The world economy needs ever-increasing amount...
5,6,Environment,Natural and man-made environmental resources –...
6,7,Financial Sector,An economy's financial markets are critical to...
7,8,Health,Improving health is central to the Millennium ...
8,9,Infrastructure,Infrastructure helps determine the success of ...
9,10,Social Protection & Labor,The supply of labor available in an economy in...


In [6]:
import pyarrow as pa
import pyarrow.parquet as pq

batch = pa.RecordBatch.from_pandas(df_topics)

table = pa.Table.from_batches([batch])

In [7]:
pq.write_table(table, 'Parquet/topics.parquet')

## Series Data

In [29]:
len(df_series14)+len(df_series8)+len(df_series4)+len(df_series3)

680

In [14]:
import wbgapi as wb
import pandas as pd

df_series3 = pd.DataFrame(wb.series.info(topic=3).items)
df_series4 = pd.DataFrame(wb.series.info(topic=4).items)
df_series8 = pd.DataFrame(wb.series.info(topic=8).items)
df_series14 = pd.DataFrame(wb.series.info(topic=14).items)
df_series3

Unnamed: 0,id,value
0,NY.ADJ.NNTY.PC.KD,Adjusted net national income per capita (const...
1,NY.ADJ.NNTY.PC.CD,Adjusted net national income per capita (curre...
2,NY.ADJ.DCO2.GN.ZS,Adjusted savings: carbon dioxide damage (% of ...
3,NY.ADJ.DCO2.CD,Adjusted savings: carbon dioxide damage (curre...
4,NY.ADJ.DNGY.CD,Adjusted savings: energy depletion (current US$)
...,...,...
249,NV.SRV.TOTL.CN,"Services, value added (current LCU)"
250,DT.DOD.DSTC.IR.ZS,Short-term debt (% of total reserves)
251,BX.GRT.TECH.CD.WD,"Technical cooperation grants (BoP, current US$)"
252,FI.RES.TOTL.CD,"Total reserves (includes gold, current US$)"


In [15]:
import pyarrow as pa
import pyarrow.parquet as pq

batch = pa.RecordBatch.from_pandas(df_series3)
table = pa.Table.from_batches([batch])
pq.write_table(table, 'Parquet/series3.parquet')

batch = pa.RecordBatch.from_pandas(df_series4)
table = pa.Table.from_batches([batch])
pq.write_table(table, 'Parquet/series4.parquet')

batch = pa.RecordBatch.from_pandas(df_series8)
table = pa.Table.from_batches([batch])
pq.write_table(table, 'Parquet/series8.parquet')

batch = pa.RecordBatch.from_pandas(df_series14)
table = pa.Table.from_batches([batch])
pq.write_table(table, 'Parquet/series14.parquet')

## Series 3

In [24]:
ids = df_series3['id'].values
for id in ids:
    series = wb.data.DataFrame([id])
    batch = pa.RecordBatch.from_pandas(series)
    table = pa.Table.from_batches([batch])
    name = 'Parquet/series_' + id.replace('.','_') + '.parquet'
    pq.write_table(table, name)

## Series 4

In [25]:
ids = df_series4['id'].values
for id in ids:
    series = wb.data.DataFrame([id])
    batch = pa.RecordBatch.from_pandas(series)
    table = pa.Table.from_batches([batch])
    name = 'Parquet/series_' + id.replace('.','_') + '.parquet'
    pq.write_table(table, name)

## Series 8

In [26]:
ids = df_series8['id'].values
for id in ids:
    series = wb.data.DataFrame([id])
    batch = pa.RecordBatch.from_pandas(series)
    table = pa.Table.from_batches([batch])
    name = 'Parquet/series_' + id.replace('.','_') + '.parquet'
    pq.write_table(table, name)

## Series 14

In [27]:
ids = df_series14['id'].values
for id in ids:
    series = wb.data.DataFrame([id])
    batch = pa.RecordBatch.from_pandas(series)
    table = pa.Table.from_batches([batch])
    name = 'Parquet/series_' + id.replace('.','_') + '.parquet'
    pq.write_table(table, name)

In [30]:
wb.data.DataFrame(['FI.RES.XGLD.CD'])

Unnamed: 0_level_0,YR1960,YR1961,YR1962,YR1963,YR1964,YR1965,YR1966,YR1967,YR1968,YR1969,...,YR2013,YR2014,YR2015,YR2016,YR2017,YR2018,YR2019,YR2020,YR2021,YR2022
economy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ABW,,,,,,,,,,,...,5.327207e+08,5.597510e+08,7.098006e+08,8.079609e+08,7.774615e+08,8.520827e+08,8.300000e+08,1.023469e+09,1.330950e+09,1.362706e+09
AFE,,,,,,,,,,,...,,,,,,,,,,
AFG,10000000.0,6240000.0,4470000.0,9240000.0,8130000.0,8980000.0,12100000.0,5300000.0,6120000.0,8180000.0,...,6.441933e+09,6.680726e+09,6.231781e+09,6.476337e+09,7.185835e+09,7.305676e+09,7.426979e+09,8.419488e+09,,
AFW,,,,,,,,,,,...,,,,,,,,,,
AGO,,,,,,,,,,,...,3.150082e+10,2.703234e+10,2.379054e+10,2.367219e+10,1.745533e+10,1.541041e+10,1.633485e+10,1.378199e+10,1.446811e+10,1.365472e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XKX,,,,,,,,,,,...,9.567427e+08,7.831531e+08,7.705501e+08,6.377976e+08,8.195827e+08,8.811673e+08,9.658319e+08,1.095368e+09,1.244319e+09,1.247751e+09
YEM,,,,,,,,,,,...,5.284072e+09,4.357504e+09,1.442757e+09,8.774422e+08,9.205589e+08,2.983208e+09,1.653643e+09,9.696130e+08,1.688000e+09,1.250826e+09
ZAF,91000000.0,95010000.0,147500000.0,143500000.0,130510000.0,174510000.0,194020000.0,196020000.0,228160000.0,283160000.0,...,4.486368e+10,4.426742e+10,4.161951e+10,4.256559e+10,4.549929e+10,4.647827e+10,4.891973e+10,4.738736e+10,5.026215e+10,5.324753e+10
ZMB,,,,,,196040000.0,204840000.0,174550000.0,193540000.0,362940000.0,...,2.683813e+09,3.078362e+09,2.967627e+09,2.352719e+09,2.082084e+09,1.569230e+09,1.448624e+09,1.203448e+09,2.753875e+09,2.967565e+09
