In [1]:
"""
The dataset represent the relationships between YouTube channels and YouTube keywords

Useful link
https://docs.bokeh.org/en/latest/docs/user_guide/graph.html
"""
pass

In [None]:
!pip install bokeh
!pip install pandas

In [3]:
import math
import itertools
import pickle as pk
import pandas as pd
from bokeh.plotting import figure, show, output_notebook, reset_output, output_file
from bokeh.models import GraphRenderer, ColumnDataSource, StaticLayoutProvider, Circle, \
    MultiLine, HoverTool, TapTool, BoxSelectTool
from bokeh.models.graphs import NodesAndLinkedEdges, EdgesAndLinkedNodes

In [4]:
# Load the dataset
with open('large_network.pkl', 'rb') as f:
    network_data = pk.load(f)
network_data.keys()

dict_keys(['nodes', 'edges'])

In [5]:
# Convert nodes into list of dictionaries
nodes = [{**{'node_index': n[0]}, **n[-1]} for n in network_data['nodes']]
nodes_df = pd.DataFrame(nodes)[['x', 'y']]

In [6]:
# Split the nodes into two groups (keywords and channels)
nodes_groups = itertools.groupby(nodes, lambda x: x['node_type'])

nodes = {}
for k,v in nodes_groups:
    nodes[k] = list(v)

In [7]:
# Create the channels DF
channels_df = pd.DataFrame(nodes['channel'])
channels_df.set_index('node_index', inplace=True)
channels_df.head()
channels_df['fill_color'] = 'royalblue'
channels_df.channel_published_at = channels_df.channel_published_at.apply(lambda x: x[:10])

In [8]:
# Create the keywords DF
keywords_df = pd.DataFrame(nodes['keyword'])
keywords_df.set_index('node_index', inplace=True)
keywords_df.head()
keywords_df['fill_color'] = 'red'

In [9]:
# Create the edges dataframe (columns names should 'start' and 'end' according to bokeh)
edges_df = pd.DataFrame([{'start':e[0], 'end': e[1]} for e in network_data['edges']])
edges_df['x'] = [[channels_df.loc[row['start']]['x'], keywords_df.loc[row['end']]['x']] 
                 for _,row in edges_df.iterrows()]
edges_df['y'] = [[channels_df.loc[row['start']]['y'], keywords_df.loc[row['end']]['y']] 
                 for _,row in edges_df.iterrows()]

In [10]:
reset_output()

In [11]:
# Find the axes ranges
min_x = nodes_df['x'].min()
max_x = nodes_df['x'].max()
min_y = nodes_df['y'].min()
max_y = nodes_df['y'].max()

range_scale = 0.1
x_range = [min_x - range_scale*math.fabs(min_x), max_x + range_scale*math.fabs(max_x)]
y_range = [min_y - range_scale*math.fabs(min_y), max_y + range_scale*math.fabs(max_y)]

In [12]:
# Create the main figure
figure_params = {
    'title': "YouTube Channels Keywords Network",
    'tools': '',
    'x_range': x_range,
    'y_range': y_range,
    'plot_width': 800,
    'plot_height': 800,
}

fig = figure(**figure_params)

# Hide the axes
fig.xaxis.visible = False
fig.yaxis.visible = False

# Hide the grid
fig.xgrid.visible = False
fig.ygrid.visible = False

# Edges rendering
edges_ds = ColumnDataSource(edges_df)
fig.multi_line('x', 'y', source=edges_ds, line_color='#cccccc', 
               line_alpha=0.8, line_width=0.5)

# Channels rendering
channels_ds = ColumnDataSource(channels_df)
channels = fig.circle('x', 'y', source=channels_ds, fill_color='fill_color', 
           line_color = None, legend_label='Channel', size=7.5)
channels_hover = HoverTool(renderers=[channels])
channels_hover.tooltips = """
<div>
    <h3>@channel_title</h3>
    <div><p>  </p></div>
    <div><img src="@channel_thumbnail_default" alt=""></div>
    <div># Subscribers: @channel_subscriber_count</div>
    <div># Views: @channel_view_count</div>
    <div>Published at: @channel_published_at</div>
</div>
"""
fig.add_tools(channels_hover)


# Kewords rendering
keywords_ds = ColumnDataSource(keywords_df)
keywords = fig.circle('x', 'y', source=keywords_ds, fill_color='fill_color', 
           line_color = None, legend_label='Keyword', size=5)

keywords_hover = HoverTool(renderers=[keywords], tooltips=[('Keyword', "@node_index")])
fig.add_tools(keywords_hover)

# output_notebook()
output_file('channels_keywords_network.html')
show(fig)