Imports and other setup

In [1]:
import pandas as pd
import networkx as nx
from visJS2jupyter import visJS_module
import matplotlib.pyplot as plt
%matplotlib inline
# For display purposes
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 10)
pd.set_option('display.width', 1000)

Setup paths, "global" variables, etc

In [2]:
edges_path = '../outputs/edges.csv'
orgs_path = '../outputs/org_names.csv'
output_path = './'
debug = False
# my_nrows = 200 # for testing
my_nrows = None # run on all rows

Read in human readable names, create a usable dict of them

In [3]:
df_org = pd.read_csv(orgs_path, dtype={
'ORGANIZATIONAL_UNIT': int,
'SCHOOL_NAME': object,
'ORG_DISPLAY_NAME': object
})
df_org_names = df_org.set_index('ORGANIZATIONAL_UNIT')
df_org_names = df_org_names[['ORG_DISPLAY_NAME']]
org_names = df_org_names.to_dict()['ORG_DISPLAY_NAME']

Read in edges from csv

In [4]:
if debug:
    if(my_nrows is not None): print "Reading first %d rows of %s" % (my_nrows, edges_path)
    else: print "Reading all rows of %s" % (edges_path)
# need to specify dtypes manually when reading many rows... Otherwise pandas wants to try to load all the rows into memory before inferring the dtype and you get a warning
df_edges = pd.read_csv(edges_path, dtype={
'Date': object,
'n1': int,
'n2': int
} , nrows=my_nrows)

Build graph G up explicitly, edge by edge. This way we can get the weights correct

In [5]:
G = nx.Graph()
default_weight = 1.0
#default_weight = 1.0 / float(len(df_edges.index))
for index, row in df_edges.iterrows():
    n1 = row['n1']
    n2 = row['n2']
    if G.has_edge(n1,n2):
        G[n1][n2]['weight'] += default_weight
    else:
        G.add_edge(n1,n2, weight=default_weight)

Create position(s), clusters TODO

In [6]:
spring_pos = nx.spring_layout(G)

In [7]:
#print nx.info(G)
#plt.axis("off")
#nx.draw_networkx(G, pos = spring_pos, with_labels = False, node_size = 30)

Draw interactive graph!

In [59]:
nodes = G.nodes()
nodes_dict = [{"id":n,
               "title":org_names[n],
              "x":spring_pos[n][0]*1000,
              "y":spring_pos[n][1]*1000,
               "color":"black"
              } for n in nodes]
node_map = dict(zip(nodes,range(len(nodes)))) # map to indices for source/target in edges

In [60]:
edges_list = list(G.edges())
edge_to_color = visJS_module.return_edge_to_color(G,field_to_map='weight',cmap=plt.cm.Blues,alpha = 1,color_vals_transform='log')
edges_dict = [{"source":node_map[edges_list[i][0]],
               "target":node_map[edges_list[i][1]],
               "title":"A",
               "edge_title_field":"B",
               "edge_label_field":"C",
               "color":edge_to_color[edges_list[i]] 
              }for i in range(len(edges_list))]

In [67]:
visJS_module.visjs_network(nodes_dict,edges_dict,
                           node_size_multiplier=8,node_font_size=0,
                           edge_width=12,
                           tooltip_delay = 0,
                           graph_width = 700,graph_height = 1000,
                           graph_id = 0,
                           config_enabled=False)