In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import networkx as nx
import matplotlib.pyplot as plt
from warnings import filterwarnings
filterwarnings('ignore')


In [2]:
def get_data(
        csvs_root='../../csvs/',
        data_root='../../../../data/trade/BACI/'):
    # RCA BASE TABLE
    rca_n = pd.read_csv(
        csvs_root + 'RCA_n.csv',
        dtype={'product': object, 'flow':int})

    # COMMODITY CODE
    commodity_code = pd.read_csv(
        csvs_root + 'commodity_code.csv',
        encoding='latin-1',
        dtype={'Code': object, 'isLeaf':int, 'Level':int})
    # BACI uses SITC rev 3(S3)
    commodity_code = \
        commodity_code[commodity_code['Classification']=='S3']
    # drop last column
    commodity_code = commodity_code.iloc[:, :-1]

    # COUNTRY NAMES
    country_df = pd.read_csv(data_root + 'countries.csv')
    countries = country_df.Name.values
    countries = np.append(countries, ["United States of America", "China, Hong Kong SAR", "China, Taiwan Province of", "Türkiye", "Iran (Islamic Republic of)", "Czechia", "Switzerland, Liechtenstein", "China, Macao SAR", "Korea, Dem. People's Rep. of", "Venezuela (Bolivarian Rep. of)", "Côte d'Ivoire", "Congo, Dem. Rep. of the", "Lao People's Dem. Rep.", "Bolivia (Plurinational State of)", "North Macedonia", "Curaçao", "State of Palestine", "Cabo Verde", "Eswatini", "British Virgin Islands", "Micronesia (Federated States of)", "Wallis and Futuna Islands", "Holy See",])
    return rca_n, commodity_code, countries

# 2016 data

In [3]:
# load data 
csvs_root = '../../csvs/'
data_root = '../../../../data/trade/BACI/'
rca_n, commodity_code, countries = get_data(csvs_root, data_root)

# merge
commodity_code.rename(columns={'Code': 'product'}, inplace=True)
rca_n = rca_n.merge(commodity_code, on='product', how='left')

# exports only

In [4]:
# only exports 
rca_n = rca_n[rca_n.flow==2].copy()

# only countries
rca_n_countries = rca_n[rca_n.economy_label.isin(countries)].copy()

# drop columns that aren't included in commodity codes
rca_n_countries.dropna(subset=['Classification'], inplace=True)

### there seems to be data missing from level1 to 2&3
- however, TOTAL and the sum of sum_kusd at each level is the same
- so for now we will just work on level3    

In [5]:
rca_n_countries.groupby(['Level'])['sum_kusd'].sum().reset_index()

level3_filter = rca_n_countries.Level==3
rca_n_countries = rca_n_countries[level3_filter].copy()

## calc RCA
for the following, apply below steps
- national ratio
- international ratio

**steps:**
1. aggregate 
2. rename
3. merge
4. calc ratio

In [6]:
# product/sum(product) for each country

sum_per_n = rca_n_countries.groupby(['economy_label'])\
    .agg({'sum_kusd': 'sum'})\
        .reset_index()[['economy_label', 'sum_kusd']]
sum_per_n = sum_per_n.rename({'sum_kusd': 'sum_per_n'}, axis=1)[['economy_label', 'sum_per_n']]

rca_n_countries = pd.merge(rca_n_countries, sum_per_n, on='economy_label', how='left')
rca_n_countries['n_ratio'] = rca_n_countries.apply(lambda x: x['sum_kusd']/x['sum_per_n'], axis=1)


# product/sum(product) across countries

sum_per_p = rca_n_countries.groupby(['product'])\
    .agg({'sum_kusd':'sum'})\
        .reset_index()[['product', 'sum_kusd']]
sum_per_p = sum_per_p.rename({'sum_kusd': 'sum_per_p'}, axis=1)[['product', 'sum_per_p']] # renaming for merging

rca_n_countries = pd.merge(rca_n_countries, sum_per_p, on='product', how='left')

# total of exports
sum_p = rca_n_countries['sum_kusd'].sum()
rca_n_countries['p_ratio'] = rca_n_countries.apply(lambda x: x['sum_per_p']/sum_p, axis=1)

rca_n_countries.sort_values(by=['economy_label','product'], inplace=True)
rca_n_countries.reset_index(drop=True, inplace=True)
rca_n_countries.to_csv('tmp.csv', index=False)

### calculate difference between calculated sum of products vs. actual sum of products for china 

In [7]:
economy_name = 'United States of America'
economy_name = 'China'
rca_n_countries_economy_filter = (rca_n_countries.economy_label==economy_name).values
rca_n_economy_filter = (rca_n.economy_label==economy_name).values
calculated_sum_per_n = rca_n_countries[rca_n_countries_economy_filter]['sum_per_n'].unique()
assert len(calculated_sum_per_n) == 1
actual_sum_per_n = rca_n[(rca_n['Level']==0).values & rca_n_economy_filter]['sum_kusd'].values[0]
difference = calculated_sum_per_n[0] - actual_sum_per_n
print(economy_name)
print("(calculated - actual)/actual: ", round(difference/actual_sum_per_n, 3)*100
, "%;\n", " "*29,round(difference/1e6), "million dollars")

China
(calculated - actual)/actual:  -0.2 %;
                               -102 million dollars


In [8]:
china_filter = (rca_n_countries['economy_label'] == 'China').values
usa_filter = (rca_n_countries['economy_label'] == 'United States of America').values
ukraine_filter = (rca_n_countries['economy_label'] == 'Ukraine').values
russia_filter = (rca_n_countries['economy_label'] == 'Russian Federation').values
rca_filter = (rca_n_countries['n_ratio'] > rca_n_countries['p_ratio']).values

china_rca = rca_n_countries[china_filter & rca_filter]
usa_rca = rca_n_countries[usa_filter & rca_filter]

china_rca_top5 = china_rca.sort_values(by=['n_ratio'], ascending=False).head(5)
usa_rca_top5 = usa_rca.sort_values(by=['n_ratio'], ascending=False).head(5)

china_rca_top5_names = china_rca_top5['product'].values
usa_rca_top5_names = usa_rca_top5['product'].values

# get products to make product network from 
top5_union = np.concatenate((china_rca_top5_names, usa_rca_top5_names))
top5_union = np.unique(top5_union)

In [9]:
fig = go.Figure()
fig.add_trace(
    go.Bar(
        x=china_rca_top5['product'],
        y=china_rca_top5['sum_kusd'],
        name='China'))
fig.add_trace(
    go.Bar(
        x=usa_rca_top5['product'],
        y=usa_rca_top5['sum_kusd'],
        name='USA'))
fig.show()

## compute centrality

### load data

In [10]:
csvs_root='../../csvs/'
for_pn = pd.read_csv(
    csvs_root + 'for_product_network.csv',
    dtype={'product': object, 'flow':int})

# only working with countries
partner_country_filter = (for_pn['partner_label'].isin(countries)).values
economy_country_filter = (for_pn['economy_label'].isin(countries)).values
for_pn_countries = for_pn[\
    partner_country_filter & economy_country_filter].copy()

# merge with commodity code
for_pn_countries = \
    for_pn_countries.merge(commodity_code, on='product', how='left')

## work on china's biggest export
(telecommunication equipment, n.e.s; & parts...)

In [11]:
product_df = for_pn_countries[for_pn_countries['product']=='764']\
    [[
        'economy_label',
        'partner_label',
        'flow',
        'kusd'
    ]]
export_product_df = product_df[product_df['flow']==2]

### truncate data
- most of the transactions are not that big

In [44]:
px.histogram(product_df.kusd, log_y=True).show()


In [52]:
# kusd_arr = product_df.kusd.values
# kusd_sum = kusd_arr.sum()
# percentile = 5
# # sort kusd_arr in ascending order
# kusd_arr.sort()
# tmp_sum = 0
# for val in kusd_arr:
#     tmp_sum += val
#     if tmp_sum/kusd_sum > percentile/100:
#         kusd_trunc = val
#         break
# kusd_trunc

product_df = product_df[product_df['kusd'] > 10000].copy()

In [53]:
export_product_df.groupby(['economy_label'])\
    .agg({'kusd': 'sum', 'partner_label':'count'} )\
        .reset_index()\
            .sort_values(by=['kusd'], ascending=False)\
                .head(5)

Unnamed: 0,economy_label,kusd,partner_label
42,China,232362400.0,207
43,"China, Hong Kong SAR",91391670.0,191
207,United States of America,43632180.0,211
212,Viet Nam,38613270.0,119
107,"Korea, Republic of",32172630.0,173


### create network

In [54]:
G = nx.DiGraph()
export_nodes = list(product_df.economy_label.unique())
import_nodes = list(product_df.partner_label.unique())
G.add_nodes_from(export_nodes, node_type='export')
G.add_nodes_from(import_nodes, node_type='import')

for index, row in product_df.iterrows():
    exporter = row['economy_label']
    importer = row['partner_label']
    kusd = row['kusd']
    G.add_edge(exporter, importer, weight=kusd)

### degree centrality of china

In [55]:
# compute degree centrality of China
dc = nx.out_degree_centrality(G)
df_dc = \
    pd.DataFrame.from_dict(dc, orient='index', columns=['out_degree_centrality'])
df_dc = df_dc.reset_index().rename({'index': 'economy_label'}, axis=1)
df_dc = df_dc.sort_values(by=['out_degree_centrality'], ascending=False)
china_dc = \
    df_dc[df_dc['economy_label']=='China']['out_degree_centrality'].values[0]
print("china's out_degree centrality:", china_dc)

ec = nx.eigenvector_centrality(G)
df_ec = \
    pd.DataFrame.from_dict(ec, orient='index', columns=['eigenvector_centrality'])
df_ec = df_ec.reset_index().rename({'index': 'economy_label'}, axis=1)
df_ec = df_ec.sort_values(by=['eigenvector_centrality'], ascending=False)
china_ec = \
    df_ec[df_ec['economy_label']=='China']['eigenvector_centrality'].values[0]
print("china's eigenvector centrality:", china_ec)


china's out_degree centrality: 0.7403846153846154
china's eigenvector centrality: 0.35349946906428237


In [56]:
df_dc.head(5)

Unnamed: 0,economy_label,out_degree_centrality
17,China,0.740385
33,"China, Hong Kong SAR",0.596154
94,United States of America,0.490385
80,Viet Nam,0.394231
30,Germany,0.355769


In [57]:
df_ec.head(5)

Unnamed: 0,economy_label,eigenvector_centrality
17,China,0.353499
80,Viet Nam,0.268654
94,United States of America,0.259562
62,Netherlands,0.234891
30,Germany,0.224901


In [58]:
# create edges
pos = nx.spring_layout(G, k=1)
# pos = nx.kamada_kawai_layout(G)
# pos = nx.nx_agraph.graphviz_layout(G, prog='dot')

edge_x = []
edge_y = []
for edge in G.edges():
    x0 = pos[edge[0]][0]
    y0 = pos[edge[0]][1]
    x1 = pos[edge[1]][0]
    y1 = pos[edge[1]][1]

    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

# create nodes
node_x = []
node_y = []
for node in G.nodes():
    x, y = pos[node][0], pos[node][1]
    node_x.append(x)
    node_y.append(y)

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))
    
# color nodes
node_adjacencies = []
node_text = []
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    # node_text.append('# of connections: '+str(len(adjacencies[1])))
    node_text.append(adjacencies[0])

node_trace.marker.color = node_adjacencies
node_trace.text = node_text

# create network graph
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='Network Graph of Product 764',
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    annotations=[dict(
                        text="",
                        showarrow=False,
                        xref="paper", yref="paper",
                        x=0.005, y=-0.002 ) ],
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), 
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                    )
fig.show()


In [32]:
for node, adjacency in enumerate(G.adjacency()):
    

[('Afghanistan',
  {'Antigua and Barbuda': {'weight': 0.1400000006},
   'Australia': {'weight': 6051.5249023},
   'Austria': {'weight': 0.0219999999},
   'Bangladesh': {'weight': 0.1379999965},
   'Belgium': {'weight': 166.95199585},
   'Bosnia and Herzegovina': {'weight': 0.6169999838},
   'Brunei Darussalam': {'weight': 0.061999999},
   'Bulgaria': {'weight': 2.1860001087},
   'Canada': {'weight': 8.2089996338},
   'Sri Lanka': {'weight': 19.523000717},
   'Chile': {'weight': 2.0280001163},
   'China': {'weight': 520.20098877},
   'China, Taiwan Province of': {'weight': 58.958999634},
   'Croatia': {'weight': 0.0489999987},
   'Czechia': {'weight': 7.2249999046},
   'Denmark': {'weight': 100.25099945},
   'Finland': {'weight': 0.6679999828},
   'France': {'weight': 43.803001404},
   'Germany': {'weight': 12.770999908},
   'Greece': {'weight': 3.7990000248},
   'China, Hong Kong SAR': {'weight': 20.979000092},
   'Hungary': {'weight': 30.646999359},
   'India': {'weight': 7.7199997902