In [59]:
"""
The dataset represent the relationships between YouTube channels and YouTube keywords

Useful link
https://docs.bokeh.org/en/latest/docs/user_guide/graph.html
"""
pass

In [None]:
!pip install bokeh
!pip install pandas

In [74]:
import math
import pickle as pk
import pandas as pd
from bokeh.plotting import figure, show, output_notebook, reset_output
from bokeh.models import GraphRenderer, ColumnDataSource, StaticLayoutProvider, Circle, MultiLine

In [70]:
# Load the dataset
with open('large_network.pkl', 'rb') as f:
    network_data = pk.load(f)
network_data.keys()

dict_keys(['nodes', 'edges'])

In [66]:
# Create the nodes dataframe
nodes_df = pd.DataFrame([{**{'node_index': n[0]},**n[-1]} for n in network_data['nodes']])

nodes_df = nodes_df[['node_index', 'node_type', 'x', 'y']]# .rename({'x':'xs', 'y': 'ys'}, axis=1)
nodes_df['color'] = ['blue' if nd_type == 'channel' else 'red' for _,nd_type in nodes_df.node_type.iteritems() ]
nodes_df.head()

Unnamed: 0,node_index,node_type,x,y,color
0,UCJuKx9AAK-72rAuSgPvJiog,channel,-792.227266,-680.471755,blue
1,UCzND61Dr1ubqRp2nBFwBQPA,channel,366.866994,-480.048406,blue
2,UCQfBKkui_2gFYLx10Oty41A,channel,-530.282103,-563.151544,blue
3,UCGpxwO2XF6sHSaRSnuzlRnQ,channel,-991.428737,482.687666,blue
4,UCMDMVopEnWq3YUpDEZYbxOw,channel,397.349146,-1034.532153,blue


In [67]:
# Create the edges dataframe (columns names should 'start' and 'end' according to bokeh)
edges_df = pd.DataFrame([{'start':e[0], 'end': e[1]} for e in network_data['edges']])
edges_df.head()

Unnamed: 0,start,end
0,UCtZ-NR6mtwRGRR48AtrYLjg,vuelta al mundo en moto
1,UCtZ-NR6mtwRGRR48AtrYLjg,viajar en moto
2,UCtZ-NR6mtwRGRR48AtrYLjg,viajes
3,UCtZ-NR6mtwRGRR48AtrYLjg,viajes aventura
4,UCtZ-NR6mtwRGRR48AtrYLjg,motos


In [52]:
reset_output()

In [79]:
# Find the axes ranges
min_x = nodes_df['x'].min()
max_x = nodes_df['x'].max()
min_y = nodes_df['y'].min()
max_y = nodes_df['y'].max()

scale = 0.1
x_range = [min_x - scale*math.fabs(min_x), max_x + scale*math.fabs(max_x)]
y_range = [min_y - scale*math.fabs(min_y), max_y + scale*math.fabs(max_y)]

In [81]:
# Create the main figure
figure_params = {
    'title': "YouTube Channel Keywords Network",
    'tools': '',
    'toolbar_location': None,
    'x_range': x_range,
    'y_range': y_range,
    'plot_width': 1000,
    'plot_height': 1000,
}

fig = figure(**figure_params)

# Hide the axes
fig.xaxis.visible = False
fig.yaxis.visible = False

# Hide the grid
fig.xgrid.visible = False
fig.ygrid.visible = False

# Create the network graph renderer
graph = GraphRenderer()

# Set up the nodes renderer
graph.node_renderer.data_source.data = {
    'index': nodes_df.node_index.to_list(),
    'color': nodes_df.color.to_list()
}
graph.node_renderer.glyph = Circle(radius=10, fill_color='color')
 
# Set up the nodes layout
graph_layout = {row['node_index']: (row['x'], row['y']) for _,row in nodes_df.iterrows()}
graph.layout_provider = StaticLayoutProvider(graph_layout=graph_layout)


# Set up the graph renderer
graph.edge_renderer.data_source.data = {
    'start': edges_df.start,
    'end': edges_df.end
}
graph.edge_renderer.glyph = MultiLine(line_color="#cccccc", line_alpha=1, line_width=1)

# Add the network graph to the main figure
fig.renderers.append(graph)

output_notebook()
show(fig)