In [1]:
#Load modules

import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd

from numpy.random import choice


In [2]:

def select_nodes_by_kw(graph, keyword_list = []):
    """
    This function returns a list of all nodes whose name contains some of the keywords.
    
    graph: networkx graph object
    
    """
    result_list = []
    for kw in keyword_list:
        result_list += [label for label in graph.nodes() if kw in label]
    return result_list

def second_neighbors(graph, nodes, second_order = True):
    """
    graph: networkx graph
    nodes: list of node names
    second_order: If 'False' search stops at direct (1st order) neighborhood. 
    Returns subgraph with up to second neighbors of node, and edges between them.                                                                              
    """
    neighbor_list = [nodes]

    for i in range(len(nodes)):
        neighbor_list += [graph.neighbors(nodes[i])]
        
        if second_order == True:
            for n in graph.neighbors(nodes[i]):
                    neighbor_list += [graph.neighbors(n)]


    return list(set([item for sublist in neighbor_list for item in sublist]))

def wrap_by_word(s, n):
    '''returns a string where \\n is inserted between every n words'''
    a = s.split()
    ret = ''
    for i in range(0, len(a), n):
        ret += ' '.join(a[i:i+n]) + ' \n '

    return ret


In [3]:
# Columns of the datasets
# Geo_cat = ['CHI ID COM']#'BRA ID MIR'
# HS_cat = ['HS ID 6']
# ind_cat = ['CIIU ID 4']

Geo_cat = ['BRA ID MIR'] #'BRA ID MIR'
HS_cat = ['HS ID 4']
ind_cat = ['CNAE ID C', 'CBO ID F']


var = 'Total Yearly Wages' # 'output_USD'
var_str = '_w'
var_lab_str = 'Wages in '# 'Output of ' #'Wages in '

In [4]:
#Choose a configuration for further aggregation.

# ctry_str = Geo_cat[0][:3]
# geo_str = Geo_cat[0][-3:]

# confs = [('4', '2', '2'),
#          ('4', '4', '2')]

# confs = [('0', 'G', '4', '4'),
#          ('GG', 'D', '4', '4'),
#          ('SGP', 'S', '4', '4'),
#          ('SGP', 'S', '4', '4')]

# for conf in confs:
# #     (CIIU_lev, HS_Exp, HS_Imp) = conf
#     (CBO_lev, CNAE_lev, HS_Exp, HS_Imp) = conf

#     col_name = ctry_str+'_'+geo_str+'_'+'_'.join([c for c in conf])+var_str
#     print col_name

In [3]:
# Path where structure reference tables are:
hard_drive_path = './../../../../../media/miglesia/Elements/Matias_Iglesias_Backup/miglesia/Documents/compute-corr-structures/'
path = 'data/disagg_struct_refs/formatted/'


GEO_chi = pd.read_csv('data/disagg_struct_refs/formatted/GEO_chi.csv')
GEO_bra = pd.read_csv('data/disagg_struct_refs/formatted/GEO_bra.csv')

GEO_bra['BRA label EST'] = GEO_bra['BRA ID EST']
GEO_bra['BRA label MER'] = GEO_bra['BRA ID MER']
GEO_bra['BRA label MIR'] = GEO_bra['BRA ID MIR']

In [4]:
import itertools
from collections import OrderedDict
from numpy import array

geo_levs = ['CHI_REG', 'CHI_PRO', 'CHI_COM']
geo_levs = ['CHI_COM']
J = len(geo_levs)

# This dictionary summarises the possible configurations for the experiment.
# 'name' points to the variable names, eg. ['Wages', 'Exports', 'Imports']
# 'dataset' tells us the name of the dataset that each of the variables above belong to.
# 'levels' tells the disaggregation levels we would like each variable to be analised in.
# 'agg_str' is the name of the dataset used for reference of the disaggregation, for each of the variables
var_dict = {'name': ['Wages', 'Exports', 'Imports'], 
       'dataset': ['prd', 'trd', 'trd'], 
       'levels': [['CI1', 'CI2','CI4'], ['HS1', 'HS2','HS4'], ['HS1', 'HS2']],
           'agg_str' : ['CIIU', 'HS', 'HS']}

var_dict = {'name': ['Wages', 'Output', 'Exports', 'Imports'], 
       'dataset': ['prd','prd', 'trd', 'trd'], 
       'levels': [['CI1', 'CI2', 'CI4'],['CI1', 'CI2', 'CI4'], ['HS1', 'HS2', 'HS4'], ['HS1', 'HS2', 'HS4']],
#            'agg_ref' : [CIIU, CIIU, HS, HS],
           'agg_str' : ['CIIU','CIIU', 'HS', 'HS']}

# The brazil case may be more special cause aggregation of the production dataset is over two directions
# var_dict = {'name': ['Wages', 'Output', 'Exports', 'Imports'], 
#        'dataset': ['prd','prd', 'trd', 'trd'], 
#        'levels': [['CN1', 'CN2', 'CN4'],['CN1', 'CN2', 'CN4'], ['HS1', 'HS2', 'HS4'], ['HS1', 'HS2', 'HS4']],
# #            'agg_ref' : [CIIU, CIIU, HS, HS],
#            'agg_str' : ['CNAE','CNAE', 'HS', 'HS']}


# all possible combination of levels. They could be filtered afterwards...
confs = list(itertools.product(*var_dict['levels']))

#select(sample) every three
# confs = array(confs)[[i for i in range(len(confs)) if i%6 == 0]]
K = len(confs)


In [5]:
import seaborn as sns
import math

import matplotlib.pyplot as plt

from bokeh.io import show, output_file, save
from bokeh.plotting import figure
from bokeh.models import GraphRenderer, StaticLayoutProvider, Circle, MultiLine, NodesOnly, NodesAndLinkedEdges, EdgesAndLinkedNodes
from bokeh.models.graphs import from_networkx
from bokeh.models.tools import HoverTool

from numpy import power

# Maximum degree that a network can have.
max_d = 10

# String that identifies the right 'weight' column in the edges dataset. 
# also used as prefix in the configuration name
geo = 'CHI_COM'


# Column of name and id of geographical units.
places_id = 'CHI ID COM'#'BRA ID MIR' #'CHI ID COM'
places_label = 'CHI label COM'#'BRA label MIR' #'CHI label COM'
GEO_ref = GEO_chi #GEO_bra #GEO_chi

ctry_str = 'CHI'

# path = 'data/new/'
# path = './../../../../../media/miglesia/Elements/corr_st_data/'

l = len(geo_levs)

k = .1

rainbow_colors = {1:'#9400D3', #Violet
2:'#4B0082', #Indigo
3:'#0000FF', #Blue
4:'#00FF00', #Green
5:'#D3D3D3', #Gray
6:'#FFFF00', #Yellow
7:'#FF7F00', #Orange
8:'#FF0000'} #Red


In [6]:
# clean preexisting plots
import os
import glob

In [256]:

# edges
# node_ref = nodes.reset_index()[['node_name', 'index']]

# data = edges.merge(node_ref, left_on='label_x', right_on='node_name').merge(node_ref, left_on='label_y', right_on='node_name').set_index(['index_x', 'index_y'])[['label_x', 'label_y', 'weight_CHI_COM']]

# data.T[(3, 7)]

# edges

[2.0896113395400002,
 2.3256419374999999,
 2.07623593298,
 2.0903851265799998,
 2.12580747184,
 2.4383481314600002,
 2.5075071770599999,
 2.4248756561800002,
 2.1521325774700002,
 2.0565401468799998,
 1.5451483430099999,
 1.8707851292099997,
 1.8970316466299999,
 1.9373474239099997,
 1.5618840858,
 1.9903734270400002,
 1.89398700469,
 1.6802679712399999,
 1.6698411225000001,
 1.56216350223,
 1.7035757956899999,
 2.0633641756899999,
 1.5981669943999999,
 1.5318085305200002,
 1.6296418306799998,
 2.02906016314,
 1.4269523240999999,
 1.8899969599700002,
 1.51533753224,
 1.64920525989,
 1.7347313573800001,
 1.3771996493599998,
 1.9561226114300001,
 1.74973076472,
 1.66165508944,
 1.5217739561700001,
 1.7167729337900002,
 2.0431228456599997,
 2.0507231637299999,
 2.3890617723700003,
 2.1831443577099998,
 1.4872434637799998,
 1.8925356577200001,
 1.98079736866,
 1.58390356873,
 1.7440031554799997,
 1.27299374764,
 1.9194540982799997,
 1.8407395222599998,
 1.7592029186900002,
 1.8146751372499

In [11]:
files = glob.glob('./Figures/6_neighbhoods/*')
path = 'data/processed/'

for f in files:
    os.remove(f)
    
    
for j in range(len(confs)):
            
    conf = confs[j]


#     (CIIU_lev, HS_Exp, HS_Imp) = conf
#     (CBO_lev, CNAE_lev, HS_Exp, HS_Imp) = conf

#     col_name = ctry_str+'_'+geo_str+'_'+'_'.join([c for c in conf])+var_str
    conf_name = geo+'_'+'_'.join([var_dict['name'][i][0]+conf[i][2::2] for i in range(len(var_dict['name']))])
    print conf_name

    try:
        print ''

        nodes = pd.read_csv(hard_drive_path+path+conf_name+'_nodes.csv', index_col = 0).sort_index().reset_index()
        edges = pd.read_csv(hard_drive_path+path+conf_name+'_edges.csv', index_col = 0)[['label_x', 'label_y', 'weight_CHI_COM']]

            ### Max degree = max_d. Parameter used to trim the network.
        df1 = edges.groupby('label_x').apply(lambda x: x.nlargest(max_d, 'weight_'+geo)).reset_index(drop = True)
        df2 = edges.groupby('label_y').apply(lambda x: x.nlargest(max_d, 'weight_'+geo)).reset_index(drop = True)
        edges = df1.merge(df2, on = ['label_x', 'label_y', 'weight_'+geo]).reset_index(drop = True)

    #     Add info of districts with most volume (CHI)
    ###
        # From concat data get node value totals

        df = pd.read_csv(hard_drive_path+path+conf_name+'_cc_data.csv', index_col = 'node_name')
        df['node_value_USD'] = df.groupby('node_name')[['value_USD']].sum()

        df = df.reset_index()

        # Rank and percentage of comunes in the Econ category
        df['rk'] = df.groupby('node_name')['value_USD'].rank(method = 'first', ascending = False)
        df['pct'] = df['value_USD'].div(df['node_value_USD']).multiply(100).dropna().round(1)

        # Get top three
        df = df.loc[df.rk <= 3].merge(GEO_ref[[places_id, places_label]].drop_duplicates())
        df['info_'] = df[places_label] + ' ('+df.pct.astype(str)+'%)'
        df = df.sort_values(by = 'node_name')
        df = pd.DataFrame(df.set_index(['node_name', 'rk'])[['info_']].unstack())
        df.columns = df.columns.droplevel(0)
        df['info'] = df[1.0] +', '+ df[2.0]+', '+df[3.0]

        #Add column of 'info' message to nodes dataframe
        nodes = nodes.reset_index().set_index('node_name')
        nodes['info'] = df[['info']]
        nodes = nodes.reset_index().set_index('index')

    #     ###

        #color
        from matplotlib import colors
        
        nodes = add_theme_attribute_column(nodes) #add gross thematic attributes, function is written below
        
        nodes['color_code'] = nodes.att_B.rank(method = 'dense') #or att_B
        
        nodes['color'] = pd.DataFrame(nodes['color_code'].map(rainbow_colors))
        
        print nodes[['att_B','color_code', 'color']].drop_duplicates().sort_values(by = 'color_code')

        
#         w = plt.cm.get_cmap('rainbow', len(nodes.color_code.unique()))

#         colors_hex = []
#         for i in range(w.N):
#             rgb = w(i)[:3] # will return rgba, we take only first 3 so we get rgb
#             colors_hex += [colors.rgb2hex(rgb)]

#         colors_hex = [w.encode('UTF8') for w in colors_hex]
#         code_hex = pd.DataFrame([nodes.color_code.unique(), colors_hex], index = ['color_code', 'color']).T
#         nodes = nodes.reset_index().merge(code_hex, on = 'color_code').set_index('index')

        #size

        value = nodes['node_value_USD']
        # nodes['node_size'] = np.sqrt((value/value.median()).values)
        nodes['node_size'] = power((value/value.median()).values, 1/2.)
        nodes['node_size'] = 10*nodes['node_size']/nodes['node_size'].median()

    #     nodes = nodes.merge(code_hex)

        ###

        G = nx.from_pandas_dataframe(edges, 'label_x', 'label_y', 'weight_'+geo)

        ### Subgraph - neighborhood
        # Random choice of some nodes...
        nodes_choice = choice(G.nodes(), 10)

        for node in nodes_choice:
            
            ix = nodes.loc[nodes.node_name == node].index[0]

            G_s = G.subgraph(second_neighbors(G, [node], second_order=True))  
            nodes_s = nodes.loc[nodes.node_name.isin(G_s.nodes())]
            edges_s = edges.loc[(edges.label_x.isin(G_s.nodes())) & (edges.label_y.isin(G_s.nodes()))]


            # nx.write_gexf(G,'networks/'+col_name+'.gexf')

        #         print 'nodes: '+str(len(G.nodes()))
        #         print 'edges: '+str(len(G.edges()))

            # Subset of nodes
    #         nodes_s = nodes.set_index('node_name').loc[G_s.nodes()].reset_index()

            #Perform Graph Drawing
        #     pos = nx.spring_layout(G, k=k)
        #     nx.draw(G, pos, ax = ax, node_size = 5*nodes.node_size.values, alpha=.6, node_color= list(nodes.color.values), edgelist = [])

            #     Bokeh drawing
            ####

            N = len(nodes_s)
            node_indices = list(range(N))
            nodes_s['node_name_'] = [wrap_by_word(s, 5) for s in nodes_s['node_name'].values]

            subgraph_frac = nodes_s['node_value_USD'].sum()/nodes['node_value_USD'].sum()
            perc_message = 'subgraph '+ str(int((100*subgraph_frac).round()))+'% of tot'

            plot = figure(title=conf_name + perc_message, x_range=(-1.1,1.1), y_range=(-1.1,1.1))

            #####

            graph = from_networkx(G_s, nx.spring_layout, center=(0,0), k = k)

            graph.node_renderer.data_source.add(list(nodes_s['color'].values), 'color')
            graph.node_renderer.data_source.add(list(nodes_s['node_name_'].values), 'node_name_')
            graph.node_renderer.data_source.add(list(nodes_s['node_value_USD'].values), 'node_value_USD')
            graph.node_renderer.data_source.add(list(nodes_s['info'].values), 'info')
            graph.node_renderer.data_source.add(list(nodes_s['node_size'].values), 'node_size')

            graph.node_renderer.glyph = Circle(size='node_size', fill_color='color')
            graph.node_renderer.selection_glyph = Circle(size=15, fill_color='#808000')
            graph.node_renderer.hover_glyph = Circle(size=15, fill_color='#808000')

            graph.edge_renderer.data_source.add(
                list(pd.DataFrame(graph.edge_renderer.data_source.data).merge(edges, how = 'left', left_on = ['end', 'start'], right_on = ['label_x', 'label_y']).fillna(0)['weight_CHI_COM'].values)
                , 'weight')
            graph.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_alpha=0.5)
            graph.edge_renderer.glyph.line_width = {'field': 'weight'}
            graph.edge_renderer.selection_glyph = MultiLine(line_color='#808000')
            graph.edge_renderer.hover_glyph = MultiLine(line_color='#808000')
            graph.selection_policy = NodesAndLinkedEdges()
            graph.selection_policy = EdgesAndLinkedNodes()

            plot.renderers.append(graph)

            hover = HoverTool(tooltips=dict(main_place="@info", value="@node_value_USD", activity="@node_name_"))
            # hover = HoverTool(tooltips = [('Info', '@node_name'),('(x,y)', '($x, $y)')])

            plot.add_tools(hover)

            file_name = './Figures/6_neighbhoods/'+conf_name+'_'+str(ix).zfill(3)+'.html'
            with open(file_name, 'w'):
                pass
            output_file(file_name)
    #         print './Figures/6_neighbhoods/'+conf_name+'_'+str(ix).zfill(3)+'.html'
            save(plot)
        
    except IOError:
#         print 'No file (IOError) at '+str(conf_name)
        pass

CHI_COM_W1_O1_E1_I1

78
78
                           att_B  color_code    color
index                                                
51                      Commerce         1.0  #9400D3
55                   Fabrication         2.0  #4B0082
1         Manufacture, machinery         3.0  #0000FF
3               Minerals, metals         4.0  #00FF00
2                         Others         5.0  #D3D3D3
48                 Public sector         6.0  #FFFF00
4                       Textiles         7.0  #FF7F00
0      Vegetables, animals, wood         8.0  #FF0000
CHI_COM_W1_O1_E1_I2

CHI_COM_W1_O1_E1_I4

CHI_COM_W1_O1_E2_I1

CHI_COM_W1_O1_E2_I2

CHI_COM_W1_O1_E2_I4

CHI_COM_W1_O1_E4_I1

CHI_COM_W1_O1_E4_I2

CHI_COM_W1_O1_E4_I4

CHI_COM_W1_O2_E1_I1

127
127
                           att_B  color_code    color
index                                                
118                     Commerce         1.0  #9400D3
122                  Fabrication         2.0  #4B0082
1         Manufactur

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [26]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
#         edges = edges.merge(nodes.reset_index()[['node_name', 'index']], left_on = 'label_x', right_on = 'node_name').merge(
#         nodes.reset_index()[['node_name', 'index']], left_on = 'label_y', right_on = 'node_name')

In [None]:
rainbow_colors = {
    1:'#9400D3', #Violet
2:'#4B0082', #Indigo
3:'#0000FF', #Blue
4:'#00FF00', #Green
5:'#D3D3D3', #Gray
6:'#FFFF00', #Yellow
7:'#FF7F00', #Orange
8:'#FF0000'} #Red

In [64]:
# Bring personalized coloring. Use coarser categories in datasets, especially create special colors for the most famous ones...

# Load info
path = 'data/disagg_struct_refs/formatted/'

CIIU = pd.read_csv(path+'CIIU_full.csv')
# CBO = pd.read_csv(path+'CBO_full.csv').astype(str)
# CNAE = pd.read_csv(path+'CNAE_full.csv').astype(str)
HS = pd.read_csv(path+'hs_full.csv').astype(str)
HS[[col for col in HS.columns if 'HS ID' in col]] = HS[[col for col in HS.columns if 'HS ID' in col]].astype('int')


nodes = nodes.sort_index()

HS_cats_dict = {1: 'Vegetables, animals, wood', 2: 'Vegetables, animals, wood', 3: 'Vegetables, animals, wood', 
     4: 'Vegetables, animals, wood', 9: 'Vegetables, animals, wood', 10: 'Vegetables, animals, wood', 
     5: 'Minerals, metals', 14: 'Minerals, metals', 15: 'Minerals, metals',  
     11: 'Textiles', 12: 'Textiles',
     16: 'Manufacture, machinery', 17: 'Manufacture, machinery', 18: 'Manufacture, machinery', 19: 'Manufacture, machinery', 20: 'Manufacture, machinery'}
CIIU_cats_dict = {'A': 'Vegetables, animals, wood', 'B': 'Minerals, metals', 'C': 'Fabrication', 'G': 'Commerce', 'O': 'Public sector'}

datasets = [{'ref': HS,'ID':'HS ID HS1', 'dict': HS_cats_dict},
               {'ref': CIIU, 'ID':'CIIU ID CI1', 'dict': CIIU_cats_dict }]

def add_theme_attribute_column(nodes):

    node_attribute_datasets = []

    for ds in datasets:
        df = nodes[[col for col in nodes.columns if ds['ID'][:-3] in col]]

        if len(set(df.columns)) > 1: #two variables in this dataset are on diff levels

            df_x = df.merge(ds['ref'].drop_duplicates(subset = df.columns[0]), 
                            on  = df.columns[0], how = 'left', suffixes=('_x', ''))
            df_x['att_B_x'] = df_x[ds['ID']].map(ds['dict'])

            df_y = df.merge(ds['ref'].drop_duplicates(subset = df.columns[1]), 
                            on  = df.columns[1], how = 'left', suffixes=('_x', ''))
            df_y['att_B_y'] = df_y[ds['ID']].map(ds['dict'])

            # Column with values of thematic labels, from entries of 'HS' ('CIIU') dataset
#             df_ds = df_x[['att_B_x']].fillna(df_y[['att_B_y']])
            df_ds = pd.DataFrame(where(df_x.att_B_x.isnull(), df_y.att_B_y, df_x.att_B_x), columns = ['att_B'])

        else:
            df_ = df.merge(ds['ref'].drop_duplicates(subset = df.columns[0])
                           , how = 'left')
            df_ds = pd.DataFrame(df_[ds['ID']].map(ds['dict']))

        df_ds.columns = ['att_B']

        node_attribute_datasets += [df_ds]
#         print len(df_ds)

    att_B = node_attribute_datasets[0].fillna(node_attribute_datasets[1])

    nodes['att_B'] = att_B.fillna('Others')
    
    return nodes


### Trying to add some info to the nodes...

In [None]:


k = .05
max_d = 10

for conf in confs:
    (CIIU_lev, HS_Exp, HS_Imp) = conf
    col_name = ctry_str+'_'+geo_str+'_'+'_'.join([c for c in conf])+var_str
    print col_name
    
    for i in range(10):


        nodes = pd.read_csv('data/node_refs/'+col_name+'_nodes_meta.csv', index_col = 0)
        print len(nodes)
        edges = pd.read_csv('data/node_refs/'+col_name+'_edges.csv', index_col = 0)

        ### Max degree = max_d
        df1 = edges.groupby('label_x').apply(lambda x: x.nlargest(max_d, 'weight_COM')).reset_index(drop = True)
        df2 = edges.groupby('label_y').apply(lambda x: x.nlargest(max_d, 'weight_COM')).reset_index(drop = True)
        edges = df1.merge(df2, on = ['label_x', 'label_y', 'weight_COM']).reset_index(drop = True)

        G = nx.from_pandas_dataframe(edges, 'label_x', 'label_y', 'weight_'+geo_str)


        ### Subgraph - neighborhood
    #         nodes_sel = select_nodes_by_kw(G, ['Wine','grape', 'Grape','bebidas fermentadas'])
    #     nodes_sel = [ 'Output ofPesca maritima ']
    #     nodes_sel = select_nodes_by_kw(G, ['Silvicultura'])
        nodes_sel = choice(G.nodes(), 1)

        G = G.subgraph(second_neighbors(G, nodes_sel, second_order=True))  
        nodes = nodes.set_index('node_name').loc[G.nodes()].reset_index()
        edges = edges.loc[(edges.label_x.isin(G.nodes())) & (edges.label_y.isin(G.nodes()))]

        # nx.write_gexf(G,'networks/'+col_name+'.gexf')

    #     Networkx drawing
        #Perform Graph Drawing
        plt.figure(figsize=(8, 8))
        pos = nx.spring_layout(G, k=0.15)

        nx.draw(G, pos, node_size = 10 * nodes.node_size.values, alpha=.6, node_color= list(nodes.color.values), edgelist = [])
        nx.draw_networkx_edges(G, pos, edge_color='.7', alpha=.3)
        #     plt.savefig('../DataViva/Figures/'+'_'+kw+'_'+name)
        plt.show()

        ####

    #     Bokeh drawing
        ####

        N = len(nodes)
        node_indices = list(range(N))
        # sizes = np.linspace(10, 20, N)
        # node_sizes = np.sqrt(nodes['node_size'].values)

        plot = figure(title="Networkx Integration Demonstration", x_range=(-1.1,1.1), y_range=(-1.1,1.1))

        #####

        graph = from_networkx(G, nx.spring_layout, center=(0,0), k = k)

        graph.node_renderer.data_source.add(list(nodes['color'].values), 'color')
        graph.node_renderer.data_source.add(list(nodes['node_name'].values), 'node_name')
        graph.node_renderer.data_source.add(list(2*nodes['node_size'].values), 'node_size')

        graph.node_renderer.glyph = Circle(size='node_size', fill_color='color')
        graph.node_renderer.selection_glyph = Circle(size=15, fill_color='#808000')
        graph.node_renderer.hover_glyph = Circle(size=15, fill_color='#808000')

        graph.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_alpha=0.2, line_width=1)
        graph.edge_renderer.selection_glyph = MultiLine(line_color='#808000', line_width=5)
        graph.edge_renderer.hover_glyph = MultiLine(line_color='#808000', line_width=5)
        graph.selection_policy = NodesAndLinkedEdges()

        plot.renderers.append(graph)

        hover = HoverTool(plot=plot, tooltips=dict(act="@node_name", value="@node_size"))
        # hover = HoverTool(tooltips = [('Info', '@node_name'),('(x,y)', '($x, $y)')])

        plot.add_tools(hover)

#         output_file("networkx_graph.html")
        # output_notebook()
        show(plot)

        nodes = nodes.sort_values(by = 'node_size', ascending = False)
        sns.barplot(x='node_size',y='node_name',palette=nodes['color'], data=nodes, edgecolor = 'None')


In [None]:
conf