In [1]:
import os

import matplotlib.pyplot as plt
import networkx as nx

from bokeh.io import show, output_notebook
from bokeh.models import Plot, Range1d, MultiLine, Circle, HoverTool, TapTool, BoxSelectTool
from bokeh.models.graphs import from_networkx, NodesAndLinkedEdges, EdgesAndLinkedNodes
from bokeh.palettes import Spectral4

%matplotlib inline

In [2]:
def get_edge_data(G, key):
    ok = []
    for i, o, d in G.edges(data=True):
        ok.append(d[key])
    return ok

In [3]:
G = nx.Graph()
#G = nx.DiGraph()
DIR_LOC = "data"

In [4]:
fnames = [i for i in os.listdir(DIR_LOC) if i[-4:] == ".csv"]
fnames.sort()
fnames

['yelp_business.csv',
 'yelp_business_attributes.csv',
 'yelp_business_hours.csv',
 'yelp_checkin.csv',
 'yelp_review.csv',
 'yelp_tip.csv',
 'yelp_user.csv']

In [5]:
G.add_nodes_from(fnames)

In [6]:
G.add_edge('yelp_business.csv', 'yelp_business_attributes.csv')
G['yelp_business.csv']['yelp_business_attributes.csv']['foreign_key'] = "business_id"

G.add_edge('yelp_business.csv', 'yelp_business_hours.csv')
G.add_edge('yelp_business_attributes.csv', 'yelp_business_hours.csv')
G['yelp_business.csv']['yelp_business_hours.csv']['foreign_key'] = "business_id"
G['yelp_business_attributes.csv']['yelp_business_hours.csv']['foreign_key'] = "business_id"

G.add_edge('yelp_business.csv', 'yelp_checkin.csv')
G.add_edge('yelp_business_hours.csv', 'yelp_checkin.csv')
G.add_edge('yelp_business_attributes.csv', 'yelp_checkin.csv')
G['yelp_business.csv']['yelp_checkin.csv']['foreign_key'] = "business_id"
G['yelp_business_hours.csv']['yelp_checkin.csv']['foreign_key'] = "business_id"
G['yelp_business_attributes.csv']['yelp_checkin.csv']['foreign_key'] = "business_id"

G.add_edge('yelp_review.csv', 'yelp_business.csv')
G.add_edge('yelp_review.csv', 'yelp_business_hours.csv')
G.add_edge('yelp_review.csv', 'yelp_business_attributes.csv')
G.add_edge('yelp_review.csv', 'yelp_checkin.csv')
G['yelp_review.csv']['yelp_business.csv']['foreign_key'] = "business_id"
G['yelp_review.csv']['yelp_business_hours.csv']['foreign_key'] = "business_id"
G['yelp_review.csv']['yelp_business_attributes.csv']['foreign_key'] = "business_id"
G['yelp_review.csv']['yelp_checkin.csv']['foreign_key'] = "business_id"

G.add_edge('yelp_review.csv', 'yelp_tip.csv')
G.add_edge('yelp_review.csv', 'yelp_user.csv')
G['yelp_review.csv']['yelp_tip.csv']['foreign_key'] = "user_id"
G['yelp_review.csv']['yelp_user.csv']['foreign_key'] = "user_id"


In [11]:
TOOL_TIPS = [
    ("Edge", "@edgename"),
    ("Foreign Key", "@attr"),
    
]

plot = Plot(plot_width=600, plot_height=600,
            x_range=Range1d(-1.1,1.1), y_range=Range1d(-1.1,1.1))
plot.title.text = "Yelp Data Schema"

plot.add_tools(HoverTool(tooltips=TOOL_TIPS), TapTool(), BoxSelectTool())

graph_renderer = from_networkx(G, nx.circular_layout, scale=1, center=(0,0))

#graph_renderer.node_renderer.data_source.data['wutang'] = list(G.edges())
graph_renderer.node_renderer.glyph = Circle(size=15, fill_color=Spectral4[0])
graph_renderer.node_renderer.selection_glyph = Circle(size=15, fill_color=Spectral4[2])
graph_renderer.node_renderer.hover_glyph = Circle(size=15, fill_color=Spectral4[1])

graph_renderer.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_alpha=0.8, line_width=5)
graph_renderer.edge_renderer.selection_glyph = MultiLine(line_color=Spectral4[2], line_width=5)
graph_renderer.edge_renderer.hover_glyph = MultiLine(line_color=Spectral4[1], line_width=5)
graph_renderer.edge_renderer.data_source.data['edgename'] = list(G.edges())
graph_renderer.edge_renderer.data_source.data['attr'] = get_edge_data(G, 'foreign_key')

graph_renderer.selection_policy = NodesAndLinkedEdges()
#graph_renderer.inspection_policy = NodesAndLinkedEdges()
graph_renderer.inspection_policy = EdgesAndLinkedNodes()

plot.renderers.append(graph_renderer)

#output_file("interactive_graphs.html")
output_notebook()
show(plot)