In [1]:
import pandas as pd
import pycountry as pc
import pycountry_convert as pcc

In [2]:
def get_alphacode(c):
    return pc.countries.search_fuzzy(c)[0].alpha_3

def get_continent(c):
    alpha2 = pc.countries.get(alpha_3=c).alpha_2
    try:
        result = pcc.country_alpha2_to_continent_code(alpha2)
    except:
        result = 'NaN'
    return result

In [3]:
total = pd.read_csv("../data/major-trading-partners.csv")

In [4]:
total = total[total['Year']==2018]

In [5]:
total.shape

(1272, 9)

In [6]:
total = total[
    (~total['Major trading partner 1 (% of exports)'].isin(['Areas nes',
                                                            'Bunkers', 
                                                            'Undisclosed', 
                                                            'Free Zones',
                                                            'Areas, nes',
                                                            'Asia nes',
                                                            'Europe nes']) &
     (~total['Major trading partner'].isin(['Other non-specified areas'])))
].replace(
    ['Dem. Rep. of the Congo', 'Rep. of Korea'],
    ['Congo, The Democratic Republic of the','Korea, Republic of']
)

The following cell takes a while to run

In [7]:
total['country_code'] = [get_alphacode(c) for c in total['Major trading partner']]
total['partner_code'] = [get_alphacode(c) for c in total['Major trading partner 1 (% of exports)']]

In [8]:
only_imports = ['Major trading partner 1 (% of imports)', 
                'Major trading partner 2 (% of imports)', 
                'Major trading partner 3 (% of imports)']
idx2 = ['partner_code' , 'country_code', 'Value']

imports = total[total['Series'].isin(only_imports)][idx2].copy() 

In [9]:
only_exports = ['Major trading partner 1 (% of exports)', 
                'Major trading partner 2 (% of exports)', 
                'Major trading partner 3 (% of exports)']
idx1 = ['country_code', 'partner_code' , 'Value']

exports = total[total['Series'].isin(only_exports)][idx1].copy()  

In [10]:
imports.rename(columns={
    'partner_code': 'Source',
    'country_code': 'Target',
    'Value': 'Weight'
}, inplace=True)

exports.rename(columns={
    'country_code': 'Source',
    'partner_code': 'Target',
    'Value': 'Weight'
}, inplace=True)

In [11]:
exports_nodelist = pd.DataFrame(
    set(list(exports['Source']) + list(exports['Target'])),
    columns=['Node']
)

imports_nodelist = pd.DataFrame(
    set(list(imports['Source']) + list(imports['Target'])),
    columns=['Node']
)

In [12]:
exports_nodelist['continent'] = [get_continent(c) for c in exports_nodelist['Node']]
imports_nodelist['continent'] = [get_continent(c) for c in imports_nodelist['Node']]

In [13]:
exports_nodelist.head()

Unnamed: 0,Node,continent
0,VGB,
1,BTN,AS
2,NIC,
3,RWA,AF
4,TTO,


In [14]:
exports_nodelist[exports_nodelist['continent']=='NaN']

Unnamed: 0,Node,continent
172,TLS,


In [15]:
exports_nodelist.iloc[51,1] = 'AS'
#Same thing is true for imports
imports_nodelist.iloc[51,1] = 'AS'

In [16]:
exports_nodelist.to_csv('../data/exports_nodelist.csv', index=False)
exports.to_csv('../data/exports_edgelist.csv', index=False)

imports_nodelist.to_csv('../data/imports_nodelist.csv', index=False)
imports.to_csv('../data/imports_edgelist.csv', index=False)