In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

import math
from bokeh.models import (BoxSelectTool, Circle, HoverTool, MultiLine, WheelZoomTool, PanTool,
                          NodesAndLinkedEdges, Plot, Range1d, TapTool, StaticLayoutProvider)
from bokeh.palettes import Spectral4, Blues8
from bokeh.plotting import from_networkx, show
from bokeh.io import output_notebook
from bokeh.plotting import figure
import numpy as np

## 1. Concepts Relations Network

In [2]:
# Read the procesed concepts-edgeges
df_edges = pd.read_csv('concepts_edges.csv')
df_edges.tail(3)

Unnamed: 0,source,target,weight,source_name,target_name
60,45,46,1,Use case,Economics
61,45,47,1,Use case,Test case
62,46,47,1,Economics,Test case


In [3]:
df = df_edges[['source', 'source_name']].drop_duplicates().rename(columns={"source":"index", "source_name": 'name'})
df2 = df_edges[['target', 'target_name']].drop_duplicates().rename(columns={"target":"index", "target_name": 'name'})
# Extract all the concepts
concepts_df = pd.concat([df, df2], ignore_index=True).drop_duplicates().sort_values(by='index')
concept_keys = concepts_df['name'].to_list()


In [4]:
# Creat the graph and the properties of each edge
G=nx.from_pandas_edgelist(df_edges, 'source', 'target', ['weight','source_name','target_name'])
nodes_order = list(G.nodes())
starts = []
ends = []
for u, v in G.edges():
    if u == v:
        G.remove_edge(u,v)
    else:
        starts.append(nodes_order.index(u))
        ends.append(nodes_order.index(v))


In [5]:
output_notebook()

N = len(G.nodes())
node_indices = list(range(N))
plot = Plot(width=600, height=600,
            x_range=Range1d(-1.1,1.1), y_range=Range1d(-1.1,1.1))
plot.title.text = "Concepts Relations Network"
plot.title.text_font_size = '20pt'

# Interactive Tools
plot.add_tools(HoverTool(tooltips='@name'), TapTool(), BoxSelectTool(), WheelZoomTool(), PanTool())

graph_renderer = from_networkx(G, nx.circular_layout, scale=1, center=(0,0))
# Noder Renderer
graph_renderer.node_renderer.data_source.data['name'] = [concept_keys[x] for x in G.nodes()]
graph_renderer.node_renderer.glyph = Circle(size=15, fill_color=Spectral4[0])
graph_renderer.node_renderer.selection_glyph = Circle(size=15, fill_color=Spectral4[2])
graph_renderer.node_renderer.hover_glyph = Circle(size=15, fill_color=Spectral4[1])
graph_renderer.edge_renderer.data_source.data['start'] = starts
graph_renderer.edge_renderer.data_source.data['end'] = ends

# Create a static layout
circ = [i*2*math.pi/N for i in node_indices]
x = [math.cos(i) for i in circ]
y = [math.sin(i) for i in circ]
graph_layout = dict(zip(node_indices, zip(x, y)))
graph_renderer.layout_provider = StaticLayoutProvider(graph_layout=graph_layout)

# Draw quadratic bezier paths
def bezier(start, end, control, steps):
    return [(1-s)**2*start + 2*(1-s)*s*control + s**2*end for s in steps]

# Edge Renderer
graph_renderer.edge_renderer.data_source.data["line_width"] = [max(2,G.get_edge_data(a,b)['weight']) for a, b in G.edges()]
graph_renderer.edge_renderer.glyph = MultiLine(line_color=Spectral4[0], line_alpha=0.6)
graph_renderer.edge_renderer.selection_glyph = MultiLine(line_color=Spectral4[2])
graph_renderer.edge_renderer.hover_glyph = MultiLine(line_color=Spectral4[1])
graph_renderer.edge_renderer.glyph.line_width = {'field': 'line_width'}
graph_renderer.edge_renderer.selection_glyph.line_width = {'field': 'line_width'}
graph_renderer.edge_renderer.hover_glyph.line_width = {'field': 'line_width'}

# Calculate edge curves
xs, ys = [], []
steps = [i/100. for i in range(100)]
for u, v in G.edges():
    sx, sy = graph_layout[nodes_order.index(u)]
    ex, ey = graph_layout[nodes_order.index(v)]
    cx = (sx+ex)/(abs(nodes_order.index(u) - nodes_order.index(v)) +1.5)
    cy = (sy+ey)/(abs(nodes_order.index(u) - nodes_order.index(v)) +1.5)
    xs.append(bezier(sx, ex, cx, steps))
    ys.append(bezier(sy, ey, cy, steps))

graph_renderer.edge_renderer.data_source.data['xs'] = xs
graph_renderer.edge_renderer.data_source.data['ys'] = ys

# Selection Policy
graph_renderer.selection_policy = NodesAndLinkedEdges()
graph_renderer.inspection_policy = NodesAndLinkedEdges()

plot.renderers.append(graph_renderer)
show(plot)

## 2. Concepts Relations Heatmap

In [6]:
# Create a matrix with the symetric relations
dfe1 = df_edges.drop(columns=['source', 'target'])
dfe2 = df_edges.drop(columns=['source', 'target']).rename(columns={"target_name":"source_name", "source_name": 'target_name'})

concepts_df = pd.concat([dfe1, dfe2], ignore_index=True).drop_duplicates()

In [7]:
# Read the total per concept for the diagonal
df_concepts = pd.read_csv('concepts.csv')
df_concepts.head(3)

Unnamed: 0,Concepts,Count
0,Deep learning,16
1,Artificial intelligence,15
2,Machine learning,16


In [8]:
# Add the diagonal to the matrix
df_concepts = df_concepts.rename(columns={'Concepts':'source_name','Count':'weight'})
df_concepts['target_name'] = df_concepts['source_name']
concepts_df = pd.concat([concepts_df, df_concepts], ignore_index=True).drop_duplicates()
concepts_df.tail()

Unnamed: 0,weight,source_name,target_name
168,1,Many-valued logic,Many-valued logic
169,1,Utility,Utility
170,1,Use case,Use case
171,1,Economics,Economics
172,1,Test case,Test case


In [9]:
from bokeh.transform import linear_cmap
from bokeh.models import BasicTicker, PrintfTickFormatter
import numpy as np

# Config
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
colors = list(Blues8)
colors.reverse()

# Sources
sources = list(set(concepts_df.source_name))
sources.sort()
targets = list(set(concepts_df.target_name))
targets.sort(reverse=True)

#Create Figure
p = figure(x_range=sources, y_range=targets,
           x_axis_location="above", width=700, height=600,
           tools=TOOLS, toolbar_location='right',
           tooltips=[('Concepts', '@source_name{safe} <br> @target_name'), ('Count', '@weight')])

p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "7px"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi / 3

p.title.text = "Concepts Relations Heatmap"
p.title.text_font_size = '20pt'

# Create heatmap as Rect matrix
hm = p.rect(x="source_name", y="target_name", width=1, height=1, source=concepts_df,
           fill_color=linear_cmap("weight", colors, low=concepts_df.weight.min(), high=concepts_df.weight.max()),
           line_color=None)

p.add_layout(hm.construct_color_bar(
    major_label_text_font_size="7px",
    ticker=BasicTicker(desired_num_ticks=len(colors)),
    label_standoff=6,
    border_line_color=None,
    padding=5
), 'right')
show(p)


## 3. Categories breakdown by number of appearances

In [10]:
# Read procesed categories
df_cat = pd.read_csv('categories_her.csv')


In [11]:
import plotly
import plotly.express as px
import numpy as np
import ipywidgets as widgets

# Function to update the Treemap
def update(sliderVal):
   thresh1, thresh2 = sliderVal
   res = df_cat[(df_cat['Count'] <= thresh2) & (df_cat['Count'] >= thresh1)]
   fig = px.treemap(res, path=[px.Constant("Categories"),'Level_1','Level_2','Level_3'], values='Count',
                  color='Count', #hover_data=['Level_3'],
                  color_continuous_scale='Blues',
                  range_color=[0,max(df_cat['Count'])],
                  title="Categories breakdown by number of appearances")
   fig.show()

# Range Slider
rangeSlider = widgets.IntRangeSlider(
    value=[0, df_cat['Count'].max()],
    min=0,
    max=df_cat['Count'].max(),
    step=1,
    description='Count:',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
widgets.interact(update, sliderVal = rangeSlider )

interactive(children=(IntRangeSlider(value=(0, 9), description='Count:', max=9), Output()), _dom_classes=('wid…

<function __main__.update(sliderVal)>

## 4. WordCloud per Sentence Sentiment

In [12]:
# Read the procesed Sentece Sentiment
df_sentences_sentiment = pd.read_csv('sentece_sentiment.csv')
df_sentences_sentiment.head()

Unnamed: 0,sentence,sentiment
0,Deep learning has yielded some remarkable resu...,0.959867
1,But one of the shortcomings (arguably) in curr...,0.810648
2,What is the significance of this current lack ...,0.0
3,What are the ethical ramifications of this issue?,0.0
4,How might this affect the long-run viability o...,0.0


In [13]:
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import ipywidgets as widgets
stopwords = set(STOPWORDS)

# Function to update the Word Cloud
def update_word(values):
   df = df_sentences_sentiment['sentiment']!=df_sentences_sentiment['sentiment']
   for v in values:
      if v == 'Positive':
         df = (df_sentences_sentiment['sentiment'] > 0) | df
      elif v == 'Neutral':
         df = (df_sentences_sentiment['sentiment'] == 0) | df
      elif v == 'Negative':
         df = (df_sentences_sentiment['sentiment'] < 0) | df
   df = df_sentences_sentiment[df]
   text = " ".join(sentece for sentece in df.sentence)
   wordcloud = WordCloud(background_color="white", stopwords=stopwords, width=1600, height=800).generate(text)
   plt.figure(figsize=(20,10))
   plt.title('WordCloud per Sentence Sentiment',fontsize = 50)
   plt.imshow(wordcloud, interpolation="bilinear")
   plt.axis("off")
   plt.show()   

# Multiple Select Box
cbox = widgets.SelectMultiple(
    options=['Positive', 'Neutral', 'Negative'],
    value=['Positive', 'Neutral', 'Negative'],
    description='Sentiment',
    disabled=False
)
widgets.interact(update_word, values = cbox )

interactive(children=(SelectMultiple(description='Sentiment', index=(0, 1, 2), options=('Positive', 'Neutral',…

<function __main__.update_word(values)>

## 5. Keywords Count Bar Chart

In [14]:
# Read Procesed Keywords
df_keywords = pd.read_csv('keywords.csv')
df_keywords.head()

Unnamed: 0,keywords,count
0,adversarial testing,2.0
1,based systems,2.0
2,bayesian networks,2.0
3,better understanding,2.0
4,decision trees,2.0


In [15]:
from bokeh.io import output_file, show
from bokeh.models import CustomJS, Slider
from bokeh.plotting import ColumnDataSource, figure, show
from bokeh.layouts import column, row

output_notebook()

source = ColumnDataSource(data=dict(keywords=df_keywords['keywords'], counts=df_keywords['count']))
original_source = ColumnDataSource(data=dict(keywords=df_keywords['keywords'], counts=df_keywords['count']))

p = figure(width=900, height=500,x_range=df_keywords['keywords'], title="Keywords counts",
           min_border_left=50, min_border_right=50, toolbar_location=None, tools="")
p.title.text_font_size = '20pt'

# Interactive tools
p.add_tools(HoverTool(tooltips='Keywords @keywords <br> Count: @counts'), TapTool())

# Create the bar chart
bar = p.vbar(x='keywords', top='counts', source=source, width=0.9)
p.xaxis.major_label_orientation = math.pi/4
# Slider
c_slider = Slider(start=0, end=max(df_keywords['count']), value=max(df_keywords['count']), step=1, title="Counts")

# Javascript to update bar chart
callback = CustomJS(args=dict(source=source, 
                              original_source=original_source, 
                              amp=c_slider,
                              target_obj=bar),
                    code="""
    var data = source.data;
    var original_data = original_source.data;
    const A = amp.value;
    const keyw = original_data['keywords']
    const counts = original_data['counts']
    for (var key in original_data) {
        data[key] = [];
        for (let i = 0; i < keyw.length; i++) {
            if(counts[i] <= A){
                data[key].push(original_data[key][i]);
            }
        }
    }
    source.change.emit();
    target_obj.change.emit();
""")
c_slider.js_on_change('value', callback)

p.xgrid.grid_line_color = None
p.y_range.start = 0

layout = column(
    p,
    c_slider,
)

show(layout)