In [2]:
import pandas as pd
import numpy as np
import networkx as nx 
import seaborn as sns
import matplotlib.pyplot as plt
import math

# Para grafos
from pyvis import network as net # Visualizaciones
import networkx as nx # Métricas basadas en grafos

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
df=pd.read_csv('df.csv')
#df.head(2)

In [4]:
df_centrality = pd.DataFrame({'id':list(set(list(df.reporter.unique())+list(df.partner.unique())))})
print('hay',df_centrality.shape[0],'paises')

hay 226 paises


### Centralidad de cada país en exportaciones de cada tipo:

In [33]:
for section in list(df.section.unique()):
    dffilt = df[df.section==section]
    section = section.lower().replace(' ','_')
    G = nx.from_pandas_edgelist(df = dffilt, 
                                source = 'reporter',
                                target = 'partner',
                                edge_attr = True, 
                                create_using=nx.DiGraph()
                                )
    # Betweeness:
    #betweenness = nx.betweenness_centrality(G, weight='export_value_usd')
    #eigenvector = nx.eigenvector_centrality(G)
    #degree = nx.degree_centrality(G)
    oudegree = nx.out_degree_centrality(G)
    nx.set_node_attributes(G, oudegree, section)
    df_metrics = pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index').reset_index()
    df_metrics.rename({'index': 'id'}, axis=1, inplace=True)
    df_centrality = df_centrality.merge(df_metrics, on='id').reset_index(drop=True)

In [5]:
#df_centrality.head(2)

In [35]:
from kneed import KneeLocator
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

In [36]:
features = df_centrality.drop('id', axis=1).values
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [None]:
kmeans_kwargs = {
    "init": "random",
    "n_init": 10,
    "max_iter": 300,
    "random_state": 42,
}

# A list holds the SSE values for each k
sse = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, **kmeans_kwargs)
    kmeans.fit(scaled_features)
    sse.append(kmeans.inertia_)
    
plt.plot(range(1, 11), sse)
plt.xticks(range(1, 11))
plt.xlabel("Number of Clusters")
plt.ylabel("SSE")
plt.show()

In [38]:
kl = KneeLocator(range(1, 11), sse, curve="convex", direction="decreasing")
kl.elbow

3

In [39]:
kmeans = KMeans(
    init="random",
    n_clusters=3,
    n_init=10,
    max_iter=300,
    random_state=42
)

kmeans.fit(scaled_features)

KMeans(algorithm='auto', copy_x=True, init='random', max_iter=300, n_clusters=3,
       n_init=10, n_jobs=None, precompute_distances='auto', random_state=42,
       tol=0.0001, verbose=0)

In [40]:
df_centrality['cluster']=kmeans.labels_

In [41]:
import geopandas

#json_paises=geopandas.read_file('world.geojson').rename({'NAME':'pais', 'ISO_A3':'country_iso'}, axis=1)
#json_paises['country_iso']=[i.lower() for i in json_paises['country_iso']]
#json_paises = json_paises[['pais','country_iso','geometry']]
#json_paises.head(2)

In [42]:
json_paises=geopandas.read_file('countries.geojson').rename({'ADMIN':'id', 'ISO_A3':'country_iso'}, axis=1)
json_paises['country_iso']=[i.lower() for i in json_paises['country_iso']]

#json_paises

In [None]:
paises = pd.read_csv('paises.csv').rename({'country_name_english':'id'},axis=1)
df_centrality = df_centrality.merge(paises[['country_iso','id']], on='id', how='left')
df_centrality.loc[df_centrality.id=='Romania','country_iso']='rou'
#df_centrality.head(2)

In [None]:
json_paises=json_paises.merge(df_centrality[['country_iso','cluster']], on='country_iso', how='left')
json_paises.head(2)

In [45]:
json_paises.to_file("json_paises.json", driver="GeoJSON")

In [None]:
import geopandas
import folium
json_paises=geopandas.read_file('json_paises.json')

colors = {'0':'#7fc97f', '1':'#beaed4', '2':'#fdc086'}

colors = {'0':'#143959', '1':'#9FD984', '2':'#F26E50'}

colors = {'0':'#FB8455', '1':'#18AE95', '2':'#5F96ED'}



m = folium.Map(location=[0, 0],
               zoom_start=2,
               max_bounds=True,
               zoom_control=False,
               scrollWheelZoom=False,
               dragging=False, 
              #tile='', 
              #tile='', 
             # tile=''
              )

folium.TileLayer('cartodbpositron').add_to(m)
#folium.TileLayer('Esri.WorldGrayCanvas').add_to(m)
#folium.TileLayer('BasemapAT.highdpi').add_to(m)
#folium.TileLayer('stamentoner').add_to(m)


folium.GeoJson(json_paises[json_paises.cluster==0].copy(), 
               name='Paises',
               style_function=lambda x: {
                                    'fillColor': colors.get(str(0)),
                                    'fillOpacity': 0.8,
                                    "weight": 1,
                                    'color' : 'white'
                                                    },
              tooltip = folium.features.GeoJsonTooltip(fields=['id'])).add_to(m)

folium.GeoJson(json_paises[json_paises.cluster==1].copy(), 
               name='Paises',
               style_function=lambda x: {
                                    'fillColor': colors.get(str(1)),
                                    'fillOpacity': 0.8,
                                    "weight": 1,
                                    'color' : 'white'
                                                    },
              tooltip = folium.features.GeoJsonTooltip(fields=['id'])).add_to(m)

folium.GeoJson(json_paises[json_paises.cluster==2].copy(), 
               name='Paises',
               style_function=lambda x: {
                                    'fillColor': colors.get(str(2)),
                                    'fillOpacity': 0.8,
                                    "weight": 1,
                                    'color' : 'white'
                                                    },
              tooltip = folium.features.GeoJsonTooltip(fields=['id'])).add_to(m)


m

In [22]:
m.save(outfile= "assets/mapa1.html")

In [47]:
json_paises[json_paises['id'].str.contains('Sudan')]

Unnamed: 0,id,country_iso,cluster,geometry
196,Sudan,sdn,0.0,"MULTIPOLYGON (((37.26450 20.74999, 37.25441 20..."
197,South Sudan,ssd,,"POLYGON ((33.96912 9.83834, 33.90489 9.71070, ..."


In [None]:
import plotly.express as px

fig = px.choropleth(
    
    #df_centrality, 
                    geojson=json_paises, 
                    locations=json_paises['id'], 
                    color=json_paises['cluster'])
                    #color_continuous_scale="Viridis",
                    #range_color=(0, 2),
                           #scope="world",
                           #labels={'cluster':'unemployment rate'}
                          
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [93]:
G = nx.from_pandas_edgelist(df = df, 
                            source = 'reporter',
                            target = 'partner',
                            edge_attr = True, 
                            #create_using=nx.DiGraph()
                            )

In [None]:
import community as community_louvain
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import networkx as nx

#first compute the best partition
partition = community_louvain.best_partition(G)
partition

In [None]:

# draw the graph
pos = nx.spring_layout(G)
# color the nodes according to their partition
cmap = cm.get_cmap('viridis', max(partition.values()) + 1)
nx.draw_networkx_nodes(G, pos, partition.keys(), node_size=40,
                       cmap=cmap, node_color=list(partition.values()))
nx.draw_networkx_edges(G, pos, alpha=0.5)
plt.show()

In [None]:

#corr_mat  = dfcorr.drop('pais',axis=1).corr()
corr_mat = dfcorr[['unspecified', 'chemical_products', 'plastics_and_rubbers',
       'textiles', 'precious_metals', 'machines', 'instruments',
       'vegetable_products', 'foodstuffs', 'animal_hides', 'wood_products',
       'paper_goods', 'footwear_and_headwear', 'transportation', 'weapons',
       'metals', 'miscellaneous', 'animal_and_vegetable_bi-products',
       'mineral_products', 'arts_and_antiques', 'stone_and_glass',
       'animal_products']].corr()

plt.figure(figsize=(20,15))
sns.set(font_scale = 2)

ax=sns.heatmap(corr_mat,annot=True, linewidths=.5)
ax.set_ylim(0 ,22)
plt.show()

In [None]:
import plotly.graph_objects as go

def df_to_plotly(df):
    return {'z': df.values.tolist(),
            'x': df.columns.tolist(),
            'y': df.index.tolist()}

fig = go.Figure(data=go.Heatmap(df_to_plotly(corr_mat), colorscale='Mint'))
fig.show()


In [259]:
df = df[df.section=='Chemical Products'].copy()

In [None]:
vertices=df.groupby(['reporter','continent_reporter'], as_index=False).export_value_usd.sum()
vertices.columns = ['pais','continente','export_value_usd']
vertices['export_value_usd'] = np.where(vertices.export_value_usd==0, 10, vertices.export_value_usd)
vertices['size'] = pd.qcut(vertices['export_value_usd'], 4, labels=[10,100,1000,2000])
vertices['color'] = np.select([vertices.continente == 'Asia', 
                               vertices.continente == 'Africa', 
                               vertices.continente == 'Americas',
                               vertices.continente == 'Europe',
                               vertices.continente == 'Oceania'],
                               ['#7fc97f','#beaed4','#fdc086','#ffff99','#386cb0'], 
                               default='other')

#vertices.head(2)

In [263]:
df=df.sort_values('export_value_usd',ascending=False).head(300).reset_index(drop=True).copy()

In [275]:
df=pd.read_csv('df.csv')
G = nx.from_pandas_edgelist(df = df[df.section=='Chemical Products'], 
                            source = 'reporter',
                            target = 'partner',
                            edge_attr = True, 
                            create_using=nx.DiGraph()
                           )


In [276]:
edge_weight=[edge[2]['export_value_usd'] for edge in G.edges(data=True)]

In [277]:
attr_dict_grupo = vertices.set_index('pais')['color'].to_dict()
nx.set_node_attributes(G, 
                       values = attr_dict_grupo,
                       name = 'color')

attr_dict_grupo = vertices.set_index('pais')['size'].to_dict()
nx.set_node_attributes(G, 
                       values = attr_dict_grupo,
                       name = 'size')

In [278]:
node_size=[node[1]['size'] for node in G.nodes(data=True)]

In [279]:
node_colors=[node[1]['color'] for node in G.nodes(data=True)]


In [None]:
plt.figure(figsize=(20,10))

nx.draw(G, 
        with_labels=True, 
        node_color=node_colors,
        node_size=node_size, 
        weight=edge_weight,
        edge_color='darkgray')

plt.show()

In [None]:
S=nx.ego_graph(G, 
               'Argentina', 
               radius=1, # Defino qué radio de vecinos busco (1: vecinos conectados por un enlace, 2 son los vecinos de sus vecinos, etc.)
               center=True, 
               undirected=True, 
               distance=None)

betweenness = nx.betweenness_centrality(S, weight='export_value_usd')
eigenvector = nx.eigenvector_centrality(S)
nx.set_node_attributes(S, betweenness, 'betweenness')
nx.set_node_attributes(S, eigenvector, 'eigenvector')


node_colors = [node[1]['betweenness'] for node in S.nodes(data=True)]
node_colors = [node[1]['eigenvector'] for node in S.nodes(data=True)]

plt.figure(figsize=(20,10))

#node_colors=[node[1]['color'] for node in S.nodes(data=True)]
node_size=[node[1]['size'] for node in S.nodes(data=True)]

cant_nodos = S.number_of_nodes()
pos = nx.spring_layout(S, 
                       k = 1/math.sqrt(cant_nodos)*5,
                       seed = 42)

nx.draw(S, 
        pos=pos,
        with_labels=True, 
       node_color=node_colors,
       node_size=node_size, 
       edge_color='grey', 
       arrowsize=20)

plt.show()

In [None]:
df_metrics=pd.DataFrame.from_dict(dict(S.nodes(data=True)), orient='index').reset_index()
df_metrics.rename({'index': 'id'}, axis=1, inplace=True)
df_metrics=df_metrics.round(3)
df_metrics.sort_values('betweenness',ascending=False).head(5)

In [None]:
df_metrics=pd.DataFrame.from_dict(dict(S.nodes(data=True)), orient='index').reset_index()
df_metrics.rename({'index': 'id'}, axis=1, inplace=True)
df_metrics=df_metrics.round(3)
df_metrics.sort_values('betweenness',ascending=False).head(5)

In [86]:
df = pd.read_csv('df.csv').drop('Unnamed: 0', axis=1)
df = df[df.reporter!=df.partner].copy()
dffilt = df[df.section=='Chemical Products'].copy()

vertices=dffilt.groupby(['reporter','continent_reporter'], as_index=False).export_value_usd.sum()
vertices.columns = ['pais','continente','export_value_usd']
vertices['export_value_usd'] = np.where(vertices.export_value_usd==0, 10, vertices.export_value_usd)
vertices['size'] = pd.qcut(vertices['export_value_usd'], 4, labels=[10,100,1000,2000])
vertices['color'] = np.select([vertices.continente == 'Asia', 
                               vertices.continente == 'Africa', 
                               vertices.continente == 'Americas',
                               vertices.continente == 'Europe',
                               vertices.continente == 'Oceania'],
                               ['#7fc97f','#beaed4','#fdc086','#ffff99','#386cb0'], 
                               default='other')

dffilt = dffilt.sort_values('export_value_usd',ascending=False).head(300).reset_index(drop=True).copy()
dffilt.shape

(300, 6)

In [89]:
len(network.edges)

300

In [None]:
network.show("grafo_grupo0.html")

In [None]:
df = pd.read_csv('df.csv')
df = df[df.reporter!=df.partner].copy()
dffilt = df[df.section=='Chemical Products'].copy()

vertices=dffilt.groupby(['reporter','continent_reporter'], as_index=False).export_value_usd.sum()
vertices.columns = ['pais','continente','export_value_usd']
vertices['export_value_usd'] = np.where(vertices.export_value_usd==0, 10, vertices.export_value_usd)
vertices['size'] = pd.qcut(vertices['export_value_usd'], 4, labels=[10,100,1000,2000])
vertices['color'] = np.select([vertices.continente == 'Asia', 
                               vertices.continente == 'Africa', 
                               vertices.continente == 'Americas',
                               vertices.continente == 'Europe',
                               vertices.continente == 'Oceania'],
                               ['#7fc97f','#beaed4','#fdc086','#ffff99','#386cb0'], 
                               default='other')

dffilt = dffilt.sort_values('export_value_usd',ascending=False).head(300).reset_index(drop=True).copy()

network = Network(
    height="2000px", 
    width="100%", 
    bgcolor="white", 
    font_color="black",
    notebook=True, 
    directed=True
)

edge_data = zip(dffilt['reporter'], 
                dffilt['partner'], 
                dffilt['export_value_usd'])

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    network.add_node(src, src, title=src)
    network.add_node(dst, dst, title=dst)
    network.add_edge(src, dst, value=w)

# add neighbor data to node hover data
for node in network.nodes:
    if node['id'] in list(vertices['pais']):
        node['value']=int(vertices.loc[vertices.pais==node['id'],'export_value_usd'].values[0])
        node['title']=node['title']+"<br>Export: "+str(vertices.loc[vertices.pais==node['id'],'export_value_usd'].values[0])
        node['color']=str(vertices.loc[vertices.pais==node['id'],'color'].values[0])


network.show_buttons()
 

network.show("grafo_grupo0.html")

In [6]:
%%writefile app.py

# ======= Libraries =======

import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input,Output,State
#import dash_bootstrap_components as dbc
import pandas as pd
from pyvis.network import Network
import visdcc
import numpy as np
from math import log, floor

#import re
#import dash_table
#import networkx as nx 
#import plotly.express as px
#import plotly.graph_objs as go

def human_format(number):
    if number!=0:
        units = ['', 'K', 'M', 'G', 'T', 'P']
        k = 1000.0
        magnitude = int(floor(log(number, k)))
        return '%.1f%s' % (number / k**magnitude, units[magnitude])
    else:
        return 0

df = pd.read_csv('df.csv')
df = df[df.reporter!=df.partner].copy()

sections = pd.DataFrame({'section': df['section'].unique()}).sort_values('section')

network = Network( 
    bgcolor="white", 
    font_color="black",
    notebook=True, 
    directed=True
)


config_layout={
   'height': '1000px', 'width': '100%',
   "nodes": {
       "borderWidth": 0,
       "borderWidthSelected": 7,
       "fixed": {"x": False, "y": False},
       "font": {"size": 13, "strokeWidth": 1, 'color':'black'},
       "shape": "dot",
       'shapeProperties': {
           'interpolation': False
       }
   },
   "edges": {
       "color": {"inherit": True},
       "smooth": {"type": "horizontal", "forceDirection": "none", "roundness": 0},
       "arrows": {"to": {"enabled": True}},
       "arrowStrikethrough": False,
       "selectionWidth": 5

   },
   "interaction": {"dragNodes": True,
                   "hideEdgesOnDrag": False,
                   "multiselect": False,
                   "navigationButtons": True},
   "physics": {
       "enabled": True,
       "stabilization":True,
       "barnesHut": {"gravitationalConstant": -40000, "springLength": 100,
                     "springConstant": 0.01},
       "minVelocity": 0.75,
       "maxVelocity": 50
}}




# ======= APP =======
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
server = app.server

app.layout = html.Div([
    html.H1('Red de exportaciones'),
    html.H4('Exportaciones por tipo exportación (2018, USD). Por categoría seleccionada se muestran las exportaciones principales'),
    html.Label(['Fuente: ', html.A('Open Trade Statistics', href='https://tradestatistics.io/')]),
    html.Div([
        html.Div([
            dcc.Dropdown(id ='input_section',
                         options=[{'label': i, 'value': i} for i in sections.section.unique()],
                         multi=False,
                         value='Chemical Products',
                         clearable=False),
            
        ], className='four columns'),
        html.Div([
             visdcc.Network(id='network',
                   data={'nodes': network.nodes,'edges': network.edges},
                   options=config_layout)
        ], className='twelve columns')
    ]),
    html.Div([
        html.Div([
            html.H3('Clustering - Kmeans'),
            html.P('Segmentación en base a la centralidad de los países en cada rama de comercio (out degree centrality)')
        ], className='four columns'),
        html.Div([
           html.Iframe(src='assets/mapa1.html', 
                       style={'border': 'none', 'width': '100%', 'height': 600,'white-space':' pre-wrap'})   
        ], className='eight columns')
    ])
])
   
    
       
    
    #html.Div([
    #        html.Div([dbc.CardBody([
    #            html.H5("Máximo exportador", style={'color': 'grey'}),
    #            html.Div(id='output_maxexpo', style={'font':'30px', 'color':'black'})
    #        ])
    #        ], style={"border": "1px white solid",
    #                  'height': '14rem',
    #                  'background-color': '#f8f9fa'}),
    #        html.Div([dbc.CardBody([
    #            html.H5("Máximo importador", style={'color': 'grey'}),
    #            html.Div(id='output_maximpo', style={'font':'30px', 'color':'black'})
    #        ])
    #        ], style={"border": "1px white solid",
    #                  'height': '14rem',
    #                  'background-color': '#f8f9fa'})
    #], style={'column-count': '2',
    #          'margin-top': '1rem',
    #          'font-size': '0.625rem'}),
    



@app.callback(
    Output('network', 'data'),
    [Input('input_section', 'value')]
)
def update_output(value):
    
    dffilt = df[df.section==value].copy()
    
    vertices=dffilt.groupby(['reporter','continent_reporter'], as_index=False).export_value_usd.sum().copy()
    vertices.columns = ['pais','continente','export_value_usd']
    vertices2=dffilt.loc[~dffilt.partner.isin(vertices.pais.unique()),['partner','continent_partner']]
    vertices2['export_value_usd']=0
    vertices2.columns = ['pais','continente','export_value_usd']
    vertices = pd.concat([vertices,vertices2],axis=0)
    vertices['color'] = np.select([vertices.continente == 'Asia', 
                                   vertices.continente == 'Africa', 
                                   vertices.continente == 'Americas',
                                   vertices.continente == 'Europe',
                                   vertices.continente == 'Oceania'],
                                   ['#FB8455','#18AE95','#62D5F0','#5F96ED','#E36BF4'], 
                                   default='other')
    vertices['label']=[human_format(i) for i in vertices.export_value_usd]
    
    dffilt = dffilt.sort_values('export_value_usd',ascending=False).head(300).reset_index(drop=True).copy()

    network = Network(bgcolor="white",font_color="black",notebook=False,directed=True)
    edge_data = zip(dffilt['reporter'], dffilt['partner'], dffilt['export_value_usd'])

    for e in edge_data:
        src = e[0]
        dst = e[1]
        w = e[2]
        network.add_node(src, src, title=src)
        network.add_node(dst, dst, title=dst)
        network.add_edge(src, dst, value=w)

    for node in network.nodes:
        if node['id'] in list(vertices['pais']):
            node['value']=int(vertices.loc[vertices.pais==node['id'],'export_value_usd'].values[0])
            node['title']=node['title']+"<br>Export: "+str(vertices.loc[vertices.pais==node['id'],'label'].values[0])
            node['color']=str(vertices.loc[vertices.pais==node['id'],'color'].values[0])

    data ={'nodes': network.nodes,
           'edges': network.edges}
    
    return data



if __name__ == '__main__':
    app.run_server()


Overwriting app.py


In [159]:
print(dash.__version__)

1.14.0
