# Network Analysis of International Trade Shows

In [1]:
import pandas as pd
from unidecode import unidecode
import string

path = 'C:/Users/2093/Desktop/Data Center/03. Data/10. trade_shows/'

# Exhibitor data
exhr = pd.read_excel(path + 'Exhibitor_List_0117.xlsx',
                     parse_cols='B,D:H,O,S',
                     parse_date=['EXHIBITION_START_DATE', 'EXHIBITION_END_DATE'])
exhr.columns = ['show', 'start_date', 'end_date', 'exhr', 'exhr_ctry', 'exhr_ctry_code', 'category',
                'show_id']
exhr['exhr'] = exhr['exhr'].astype(str)
exhr['show_id'] = exhr['show_id'].astype(int)

# Clean exhibitor country and company name columns
exhr['exhr_ctry'] = exhr['exhr_ctry'].str.lower().str.strip().str.replace(' ', '').replace({
    'cacada': 'canada',
    'ivorycoast': "coted'ivoire",
    'southkorea': 'korea',
    'russianfederation': 'russia',
    'serbiaandmontenegro': 'serbia'
    })

exhr['exhr_conv'] = exhr['exhr'].str.lower().str.strip().apply(lambda x: unidecode(x)).apply(
    lambda x: x.translate(str.maketrans({key: None for key in string.punctuation}))).str.replace(
    r'\s+', r' ')
pd.Series(' '.join(exhr['exhr_conv']).split(' ')).value_counts().head(20)

exhr = exhr[exhr['start_date'].dt.year == 2016]

In [2]:
# All trade shows in 2016 with dates
shows_16 = exhr[['show_id', 'start_date']].drop_duplicates().sort_values(
    'start_date').reset_index(drop=True)

# Build adjacency matrix
adj = pd.DataFrame([(x, y) for x in shows_16['show_id'] for y in shows_16['show_id']
                    if shows_16.loc[shows_16['show_id'] == x, 'start_date'].values
                    < shows_16.loc[shows_16['show_id'] == y, 'start_date'].values])
adj.columns = ['former', 'latter']
adj['weight'] = 0
for lab, row in adj.iterrows():
    adj.loc[lab, 'weight'] = (
        exhr[(exhr['show_id'] == row['former']) | (exhr['show_id'] == row['latter'])]
        .groupby('exhr')['show_id'].count() >= 2
    ).sum()

# Nodes that are more interesting
adj_i = adj[adj['weight'] >= adj['weight'].mean()]

In [3]:
import networkx as nx

D = nx.from_pandas_dataframe(adj_i, source='former', target='latter', edge_attr='weight',
                             create_using=nx.DiGraph())

In [4]:
# Trade show data
shows = pd.read_excel(path + 'Jetro_Expo_List0119.xls', index_col=0,
                      parse_cols='A,B,D:F,H,P,Q',
                      parse_date=['exhibition_start_date', 'exhibition_end_date'])
shows.index.name = 'show_id'
shows.columns = ['show', 'start_date', 'end_date', 'show_ctry', 'show_ctry_code', 'industry', 'n_com']
shows['istw'] = shows['show_ctry_code'] == 'TW'
shows_16 = shows[shows['start_date'].dt.year == 2016]

# Build trade show metadata
for n, d in D.nodes(data=True):
    if n in shows_16.index:
        row = shows_16.loc[n]
        d.update({
            'name': row['show'],
            'start': row['start_date'],
            'end': row['end_date'],
            'ctry': row['show_ctry'],
            'istw': row['istw'],
            'n_com': row['n_com']
        })
    else:
        D.remove_node(n)
        print('Removed node {}.'.format(n))

Removed node 1122.
Removed node 2294.


In [5]:
import nxviz as nv
import matplotlib.pyplot as plt

m = nv.MatrixPlot(D)
m.draw()
plt.savefig('matrix_plot.jpg', dpi=300)

a = nv.ArcPlot(D, edge_width='weight', node_color='istw', node_size='n_com')
a.draw()
plt.savefig('arc_plot.jpg', dpi=300)

c = nv.CircosPlot(D, edge_width='weight', node_color='istw', node_size='n_com')
c.draw()
# labels = {}
# labels.update({n: d['name'] for n, d in D.nodes(data=True) if d['istw']})
# pos = nx.nx_pydot.graphviz_layout(D, prog='circo')
# nx.draw_networkx_labels(D, pos, labels, font_size=16)
plt.savefig('circos_plot.jpg', dpi=300)