In [1]:
# This jupyter lab code extracts the world bank data for each country, for all series
import pandas as pd
from pathlib import Path

In [2]:
# read all of the world bank data from the csv file
wb_data_df = pd.read_csv(Path('../data/WB_DATA.csv'))

In [3]:
# check if a successful read
wb_data_df.head()

Unnamed: 0,series_id,country_code,country_name,year,value
0,AG.LND.AGRI.ZS,ECA,Europe & Central Asia (excluding high income),1981,28.436899
1,TM.VAL.AGRI.ZS.UN,LKA,Sri Lanka,1985,1.879218
2,TM.VAL.AGRI.ZS.UN,LKA,Sri Lanka,1986,1.621166
3,TM.VAL.AGRI.ZS.UN,LKA,Sri Lanka,1987,1.856747
4,TM.VAL.AGRI.ZS.UN,LKA,Sri Lanka,1990,1.803669


In [4]:
# allow jupyter to show all the rows of the data frame
pd.set_option('display.max_rows', None)
# select country_code and country_name columns and remove duplicates, store the result
# in a new data frame
column_names = ['country_code','country_name']
wb_data_unique = wb_data_df[column_names].drop_duplicates()
# set the index to country_code
wb_data_unique.set_index('country_code').sort_values(by='country_name')

Unnamed: 0_level_0,country_name
country_code,Unnamed: 1_level_1
AFG,Afghanistan
AFE,Africa Eastern and Southern
AFW,Africa Western and Central
ALB,Albania
DZA,Algeria
ASM,American Samoa
AND,Andorra
AGO,Angola
ATG,Antigua and Barbuda
ARB,Arab League states


In [5]:
# reset the display to maximum of 20 rows
pd.set_option('display.max_rows', 20)

In [6]:
# identify the countries for analysis
# you can add more countries here and the jupyter notebook will create additional files
# iff not already exists
countries_list = ['USA','NGA','CHN','SAU','DEU','BRA','AUS']

In [7]:
# for each country in the country list, extract the data from world bank dataframe and write the
# country data to a file with the country code included in the file name
for country in countries_list:
    wb_data_df_country = wb_data_df[wb_data_df['country_code'] == country]
    # write the country data to the file only if the file does not exist
    try:
        file_name = f'WB_DATA_{country}.csv'
        wb_data_df_country.to_csv(Path(f'../data/{file_name}'),index=False,mode='x')
        display(f"the file {file_name} has been created")
    except:
        display(f"Skipping creating the file {file_name} because it already exists")
    
# wb_data_df_usa = wb_data_df[wb_data_df['country_code'] == 'USA']

'the file WB_DATA_USA.csv has been created'

'the file WB_DATA_NGA.csv has been created'

'the file WB_DATA_CHN.csv has been created'

'the file WB_DATA_SAU.csv has been created'

'the file WB_DATA_DEU.csv has been created'

'the file WB_DATA_BRA.csv has been created'

'the file WB_DATA_AUS.csv has been created'