# 04_graph

In [None]:
pip install networkx

## read news SPO from DataPlatform

In [None]:
from dsmlibrary.datanode import DataNode
from tqdm.auto import tqdm

In [None]:
datanode = DataNode()

In [None]:
dir_process_id = 65

In [None]:
df = datanode.read_df(datanode.get_file_id(directory_id=dir_process_id, name="spo.parquet"))
df

## prepair graph data

In [None]:
graph_data = [(elm.get('tags'), elm.get('Topic')) for elm in df.to_dict('records')]
graph_data  

In [None]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt

from bokeh.io import output_notebook, show, save
from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine
from bokeh.plotting import figure
from bokeh.plotting import from_networkx

In [None]:
output_notebook()

In [None]:
news_titles = {elm:'name' for elm in list(df['tags'].unique())}
news_titles

In [None]:
G = nx.Graph(graph_data)
G

In [None]:
node_colors = {elm:'skyblue' if elm in news_titles else 'yellow' for elm in G.nodes()}
node_colors

## visualize raw data

In [None]:
title = "News and NameEntities"
HOVER_TOOLTIPS = [("Character", "@index")]
nx.set_node_attributes(G, node_colors, 'node_colors')
plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset,hover", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title,
              frame_width=1200, frame_height=800
 )

network_graph = from_networkx(G, nx.kamada_kawai_layout, scale=10, center=(0, 0))
network_graph.node_renderer.glyph = Circle(size=15, fill_color='node_colors')
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)
plot.renderers.append(network_graph)
show(plot)

## Compute betweenness_centrality graph

In [None]:
def cal_size(val, k=10, k2=1000):
    val = val * k * k2
    if val > k: 
        return val
    return k

In [None]:
graph_values = nx.centrality.betweenness.betweenness_centrality(G)
node_size = {k:cal_size(v, k=10, k2=1000) for k,v in graph_values.items()}

In [None]:
title = "betweenness_centrality"
HOVER_TOOLTIPS = [("Character", "@index")]

nx.set_node_attributes(G, node_size, 'node_size')
nx.set_node_attributes(G, node_colors, 'node_colors')

plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset,hover", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title,
              frame_width=1200, frame_height=800
)

network_graph = from_networkx(G, nx.kamada_kawai_layout, scale=10, center=(0, 0))
network_graph.node_renderer.glyph = Circle(fill_color='node_colors', size='node_size')
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)
plot.renderers.append(network_graph)
show(plot)

## compute eigenvector_centrality

In [None]:
graph_values = nx.centrality.eigenvector.eigenvector_centrality(G, max_iter=600)
node_size = {k:cal_size(v, k=10, k2=15) for k,v in graph_values.items()}

In [None]:
title = "xxx"
HOVER_TOOLTIPS = [("Character", "@index")]

nx.set_node_attributes(G, node_size, 'node_size') 
nx.set_node_attributes(G, node_colors, 'node_colors')

plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset,hover", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title,
              frame_width=1200, frame_height=800
)

network_graph = from_networkx(G, nx.kamada_kawai_layout, scale=10, center=(0, 0))
network_graph.node_renderer.glyph = Circle(fill_color='node_colors', size='node_size')
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)
plot.renderers.append(network_graph)
show(plot)