# Retrieve World Bank Trade Data

Data comes from [https://wits.worldbank.org/witsapiintro.aspx]

Prereqs:
* Need to run "pip install pandaSDMX" to install parser to read SDMX data from a request

In [1]:
from pandasdmx import Request
import json
from pprint import pprint as pp
import pandas as pd
import numpy as np

In [2]:
wits = Request('WBG_WITS')
flow_response = wits.dataflow()
flow_response.write().dataflow.head()

ConnectionError: HTTPConnectionPool(host='wits.worldbank.org', port=80): Max retries exceeded with url: /API/V1/SDMX/V21/rest/dataflow/WBG_WITS/latest (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0xef457c30>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))

In [None]:
trade_stats = wits.dataflow('DF_WITS_TradeStats_Trade')

In [None]:
trade_data_structure = trade_stats.dataflow.DF_WITS_TradeStats_Trade.structure()
trade_data_structure

In [None]:
trade_data_structure.dimensions.aslist()

Only interested in trade with the US

In [None]:
reporters = trade_stats.write().codelist.loc['REPORTER']
select_reporter = reporters.loc[['USA']]
select_reporter

Looking for specific trade partners: Mexico, Canada, EU, China, Russia, World. We limit the definition of the EU to just the 11 countries that adopted the Euro as their currency in 1999.  I tried to use the partner code 'EUN' but there is no data in the with that partner code.

In [None]:
partners = trade_stats.write().codelist.loc['PARTNER']

#for index, row in partners.iterrows():
#    print(f"'{index}':\t\t'{row['name']}'")

# Countries we are interested
partner_list = ['MEX','CAN','CHN','RUS']
          
# Countries which adopted the Euro as their currency in 1999
eu_list = ['AUT','BEL','FIN','FRA','DEU','IRL','ITA','LUX','NLD','PRT','ESP']
          
# Separate list for the lonely total partner representing the world 'WLD'
world_list = ['WLD']
          
select_partners = partners.loc[partner_list + eu_list + world_list]
select_partners

Only product we are interested in is "all products" i.e. 'Total'

In [None]:
products = trade_stats.write().codelist.loc['PRODUCTCODE']

#for index, row in products.iterrows():
#    print(f"'{index}':\t\t'{row['name']}'")

select_product = products.loc[['Total']]
select_product

Only looking for the import and export values. All values for these two indicators are in thousands of USD.

In [None]:
indicators = trade_stats.write().codelist.loc['INDICATOR']

#for index, row in indicators.iterrows():
#    print(f"'{index}':\t\t'{row['name']}'")

select_indicators = indicators.loc[['MPRT-TRD-VL', 'XPRT-TRD-VL']]
select_indicators

Only time frequency is 'annual'.

In [None]:
trade_stats.write().codelist.loc['FREQ'].head()

In [None]:
freq = 'A'
partners_list = select_partners.index.tolist()
reporter = select_reporter.index.tolist()[0]
product = select_product.index.tolist()[0]
indicator_list = select_indicators.index.tolist()

response = wits.data(resource_id = 'DF_WITS_TradeStats_Trade',
                 key={'FREQ': freq,
                      'REPORTER': reporter,
                      'PARTNER':  partners_list,
                      'PRODUCTCODE': product,
                      'INDICATOR': indicator_list},
                 params = {'startPeriod': '2000',
                           'endPeriod': '2017'},
                 dsd = trade_data_structure)

In [None]:
trade_df = response.write((s for s in response.data.series))
trade_df.rename(columns={'MPRT-TRD-VL': 'Imports', 'XPRT-TRD-VL': 'Exports'}, inplace=True)

import_columns_to_sum = [ ('A', 'USA', cty, 'Total', 'Imports') for cty in eu_list ]
import_column_eu = ('A', 'USA', 'EU', 'Total', 'Imports')
trade_df[import_column_eu] = trade_df[import_columns_to_sum].sum(axis=1)
trade_df.drop(import_columns_to_sum, axis=1, inplace=True)

export_columns_to_sum = [ ('A', 'USA', cty, 'Total', 'Exports') for cty in eu_list ]
export_column_eu = ('A', 'USA', 'EU', 'Total', 'Exports')
trade_df[export_column_eu] = trade_df[export_columns_to_sum].sum(axis=1)
trade_df.drop(export_columns_to_sum, axis=1, inplace=True)

known_cty_list = partner_list + [ 'EU' ]

known_import_columns_to_sum = [ ('A', 'USA', cty, 'Total', 'Imports') for cty in known_cty_list ]
import_column_other = ('A', 'USA', 'Other', 'Total', 'Imports')
import_column_world = ('A', 'USA', 'WLD', 'Total', 'Imports')
trade_df[import_column_other] = trade_df[import_column_world] - trade_df[known_import_columns_to_sum].sum(axis=1)

known_export_columns_to_sum = [ ('A', 'USA', cty, 'Total', 'Exports') for cty in known_cty_list ]
export_column_other = ('A', 'USA', 'Other', 'Total', 'Exports')
export_column_world = ('A', 'USA', 'WLD', 'Total', 'Exports')
trade_df[export_column_other] = trade_df[export_column_world] - trade_df[known_export_columns_to_sum].sum(axis=1)

trade_df.head()

In [None]:
trade2_csv = trade_df.to_csv()

# now write output to a file
with open('trade2_data_file.csv', 'w') as outfile:
    outfile.write(trade2_csv)

In [None]:
new_partner_list = ["CAN","CHN","MEX","RUS","WLD","EU","Other"]
trade2_df = pd.DataFrame(columns=['Year','Partner','Imports','Exports'])
print(trade_df.iloc[0])
for year in np.arange(2000,2018):
    for partner in new_partner_list:
        index_imports = ('A','USA',partner,'Total','Imports')
        index_exports = ('A','USA',partner,'Total','Exports')
        import_value = trade_df.iloc[0][index_imports]
        export_value = trade_df.iloc[0][index_exports]
        print(import_value)
        trade2_df.append([{'Year':2000, 'Partner':partner,
                          'Imports': import_value,
                          'Exports': export_value}])
trade2_df.head()

In [None]:
# This line commented below should have worked but it doesn't.  You cannot use
# rename to rename columns with tuple names.  I had to use something a little
# more dangerous by explicitly set the columns attribute of the DataFrame

#new_column_names = { x: f"{x[2]}_{x[4]}" for x in list(trade_df) }
#trade_df.rename(columns=new_column_names, inplace=True)

trade_df.columns = [f"{x[2]}_{x[4]}" for x in trade_df.columns]
trade_df.head()

In [None]:
partner_import_cols = [f"{s}_Imports" for s in new_partner_list]
partner_export_cols = [f"{s}_Exports" for s in new_partner_list]
for p in new_partner_list:
    trade_df[f"{p}_Diff"] = trade_df[f"{p}_Imports"] - trade_df[f"{p}_Exports"]
trade_df.head()

In [None]:
for p in new_partner_list:
    trade_df[f"{p}_Scale"] = trade_df[f"{p}_Diff"] / trade_df[f"{p}_Exports"]
trade_df.head()

Now let's save the data as JSON so that we do not have to repeatedly make API calls to retrieve the data.

In [None]:
trade_csv = trade_df.to_csv()

# now write output to a file
with open('trade_data_file.csv', 'w') as outfile:
    outfile.write(trade_csv)