# Interactive Innovation Mapping with Python

Bring some life to your innovation mapping notebook analysis with interactive data viz! 🕹

---

This tutorial covers a few examples of interactive data visualisation with Python that can be used to create rich analysis notebooks or as inline prototypes for web visualisations.

In this tutorial, we are going to be based on Bokeh and will make use of HoloViews, GeoViews and Datashader. There are many options for interactive data visualisation with Python however, including Altair, Plotly, Dash, and even Matplotlib, so try them out too!

## Preamble

In [None]:
%load_ext autoreload
%autoreload 2
# install im_tutorial package
# !pip install git+https://github.com/nestauk/im_tutorials.git

In [None]:
# useful Python tools
import itertools
import collections

# matplotlib for static plots
import matplotlib.pyplot as plt
# networkx for networks
import networkx as nx
# numpy for mathematical functions
import numpy as np
# pandas for handling tabular data
import pandas as pd
# seaborn for pretty statistical plots
import seaborn as sns

pd.set_option('max_columns', 99)

# basic bokeh imports for an interactive scatter plot or line chart
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Circle, Line

# NB: If using Google Colab, this function must be run at 
# the end of any cell that you want to display a bokeh plot.
# If using Jupyter, then this line need only appear once at
# the start of the notebook.
output_notebook()

## Import Data

In [None]:
# The im_tutorials datasets module can be used to easily load datasets.
from im_tutorials.data.gtr import gtr_table, gtr_link_table, gtr_table_list
from im_tutorials.data.sdg import sdg_web_articles

## SDG Classifier

In [None]:
df_sdg = sdg_web_articles()

In [None]:
sdg_definitions = {
     1: '1. No Poverty',
     2: '2. Zero Hunger',
     3: '3. Good Health & Well-being',
     4: '4. Quality Education',
     5: '5. Gender Equality',
     6: '6. Clean Water & Sanitation',
     7: '7. Affordable & Clean Energy',
     8: '8. Decent Work & Economic Growth',
     9: '9. Industry, Innovation & Infrastructure',
     10: '10.  Reduced Inequalities',
     11: '11.  Sustainable Cities & Communities',
     12: '12.  Responsible Consumption & Production',
     13: '13.  Climate Action',
     14: '14.  Life Below Water',
     15: '15.  Life on Land',
     16: '16.  Peace, Justice & Strong Institutions',
     17: '17.  Partnerships for the Goals'
}

In [None]:
df_sdg.head()

In [None]:
def remove_goal(l, goal=17):
    new_goals = [g for g in l if g != goal]
    return new_goals

df_sdg['sdg_goals'] = df_sdg['sdg_goals'].apply(remove_goal)

In [None]:
df_sdg['n_goals'] = [len(x) for x in df_sdg['sdg_goals']]

fig, ax = plt.subplots()
df_sdg['n_goals'].value_counts().plot.bar(ax=ax)
ax.set_title('Number SDGs per Article')
ax.set_xlabel('N Goals')
ax.set_ylabel('Frequency');

In [None]:
df_sdg = df_sdg[(df_sdg['n_goals'] > 0) & (df_sdg['n_goals'] < 4)]

In [None]:
sdg_counts = pd.Series(chain(*df_sdg['sdg_goals'])).map(sdg_definitions).value_counts()

fig, ax = plt.subplots()
sdg_counts.plot.barh(ax=ax)
ax.set_title('Frequency of Goals')
ax.set_xlabel('Frequency')
ax.set_ylabel('Goal');

In [None]:
fig, ax = plt.subplots()
ax.hist(df_sdg['text'].str.len(), bins=100)
ax.set_title('Text Length')
ax.set_xlabel('N Characters')
ax.set_ylabel('Frequency');

In [None]:
df_sdg = df_sdg[df_sdg['text'].str.len() > 140]
df_sdg = df_sdg.drop_duplicates('text')
df_sdg = df_sdg.drop('index', axis=1)
df_sdg = df_sdg.reset_index()

In [None]:
df_sdg.shape

### Text Preprocessing

In [None]:
from im_tutorials.features.text_preprocessing import *

In [None]:
tokenized = [list(chain(*tokenize_document(document))) for document in df_sdg['text'].values]

In [None]:
from gensim.models.phrases import Phraser, Phrases

In [None]:
phrases = Phrases(tokenized, min_count=10)
phraser = Phraser(phrases)
bigrammed = [phraser[d] for d in tokenized]

In [None]:
print(len(phraser.phrasegrams))
phraser.phrasegrams

In [None]:
from collections import Counter
from itertools import chain

term_counts = Counter(chain(*bigrammed))
term_counts.most_common(30)

In [None]:
stop_words = ['development', 'sdg', 'new', 'global', 'also', 'including', 'support', 'international',
             'report', 'implementation', 'national', 'said', 'agenda', 'meeting', 'regional']

In [None]:
bigrammed = [' '.join([t for t in d if t not in stop_words]) for d in bigrammed]

In [None]:
bigrammed[0]

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.manifold import TSNE, Isomap

In [None]:
tfidf = TfidfVectorizer(bigrammed, min_df=10, max_df=0.5)
tfidf_vecs = tfidf.fit_transform(bigrammed)
svd = TruncatedSVD(n_components=30)
svd_vecs = svd.fit_transform(tfidf_vecs)

In [None]:
tsne = TSNE(n_components=2)
tsne_vecs = tsne.fit_transform(svd_vecs)

In [None]:
from bokeh.models import HoverTool
from bokeh.palettes import Category20_16

In [None]:
df_sdg.index

In [None]:
single_goals = (df_sdg[df_sdg['n_goals'] == 1]).index.values
tsne_vecs_single = tsne_vecs[single_goals]
goal_labels_single = [g[0] for g in df_sdg['sdg_goals'][single_goals]]
titles_single = df_sdg['title'][single_goals].values

In [None]:
colors = [Category20_16[g-1] for g in goal_labels_single]

cds = ColumnDataSource(data={
    'tsne_0': tsne_vecs[:, 0][single_goals],
    'tsne_1': tsne_vecs[:, 1][single_goals],
    'color': colors,
    'goal': [sdg_definitions[g] for g in goal_labels_single],
    'title': titles_single,
    'id': single_goals
})

p = figure(width=900, title='TSNE Plot of Single SDG Article Vectors')

hover = HoverTool(tooltips=[('Goal', '@goal'), ('Title', '@title'), ('ID', '@id')])

p.circle(source=cds, x='tsne_0', y='tsne_1', color='color', line_width=0, legend='goal', radius=0.4, alpha=0.9)
p.add_tools(hover)

show(p)
# output_notebook()


## GtR Projects

In [None]:
gtr_projects_df = gtr_table('projects')
gtr_funds_df = gtr_table('funds')
gtr_funds_link_table = gtr_link_table('funds')

- Join funding table to link table to get project ids. Groupby project to get start and end date, sum of funding.
- Group leads and collaborators and create network
- Join with project descriptions and make collaboration network with SDGs

In [None]:
gtr_funds_df = gtr_funds_df.merge(gtr_funds_link_table, left_on='id', right_on='id')
gtr_funds_df = gtr_funds_df.drop_duplicates(['project_id', 'amount'])

In [None]:
gtr_funds_df.head()

In [None]:
print('Earliest start date:', gtr_funds_df['start'].min())
print('Earliest end date:', gtr_funds_df['end'].min())
print('\n')
print('Latest start date:', gtr_funds_df['start'].max())
print('Latest end date:', gtr_funds_df['end'].max())

In [None]:
gtr_funds_df['start'].dt.year.value_counts()[:15]

In [None]:
gtr_funds_df['end'].dt.year.value_counts()

In [None]:
min_start_year = 2006
max_start_year = 2019
max_end_year = 2030

gtr_funds_df = gtr_funds_df[(gtr_funds_df['start'].dt.year >= min_start_year) & 
                            (gtr_funds_df['start'].dt.year < max_end_year)]
gtr_funds_df = gtr_funds_df[(gtr_funds_df['end'].dt.year <= max_end_year)]

In [None]:
# duration = gtr_funds_df['end'] - gtr_funds_df['start']

# fig, ax = plt.subplots(ncols=2, figsize=(10,4))
# ax[0].hist(duration.dt.days / 365.25, bins=100)
# ax[1].scatter(duration.dt.days / 365.25, np.log10(gtr_funds_df['amount']), alpha=0.1)

In [None]:
gtr_projects_funds_df = gtr_projects_df.merge(
    gtr_funds_df, left_on='id', right_on='project_id', how='left')

gtr_project_funds_df = gtr_projects_funds_df.drop_duplicates(subset=['project_id'])
gtr_project_funds_df['start_year'] = gtr_project_funds_df['start_y'].dt.year

In [None]:
rolling_window = 3

gtr_project_funds_df['start_year'] = gtr_project_funds_df['start_y'].dt.year

# group funds by start of year
grouper = pd.Grouper(key='start_y', freq='YS')
amount_year_sum = gtr_project_funds_df.groupby([grouper, 'leadFunder'])['amount'].sum()
amount_year_sum = amount_year_sum.loc[
    pd.to_datetime('2006-01-01'):pd.to_datetime('2018-01-01')].unstack()

amount_year_sum_rolling = amount_year_sum.rolling(rolling_window).mean()

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(amount_year_sum_rolling, marker='o')
ax.legend(amount_year_sum_rolling.columns)
ax.set_xlabel('Year')
ax.set_ylabel('Total Funding (£)')
ax.set_title('Total Funding Over Time');

In [None]:
from bokeh.palettes import Category10_9
from bokeh.models import PrintfTickFormatter, HoverTool

In [None]:
# create a figure object with 
p = figure(width=550, height=350, x_axis_type='datetime',
          title='Total Awards by Funder over Time')

# loop through columns, select color, plot line and circles
for i, c in enumerate(amount_year_sum.columns):
    color = Category10_9[i]
    p.line(
        x=amount_year_sum_rolling.index.values, 
        y=amount_year_sum_rolling[c], 
        legend=c,
        color=color,
        line_width=2,
        alpha=0.7,
        name=c,
        muted_alpha=0.1,
        muted_color=color
          )
    p.circle(
        x=amount_year_sum_rolling.index.values, 
        y=amount_year_sum_rolling[c], 
        legend=c,
        color=color, 
        name=c,
        muted_alpha=0.1, 
        muted_color=color
    )

# build a hover tool that will display funding amount (y value), 
# year (x value) and funding amount
hover = HoverTool(tooltips=[('Amount', '£@y{( 0.00 a)}'),
                            ('Year', '@x{%F}'),
                            ('Funder', '$name')],
                  line_policy='nearest',
                  formatters={'x': 'datetime'}
                 )
p.add_tools(hover)

# add labels and formatting
p.xaxis.axis_label = 'Year'
p.yaxis.axis_label = 'Total Funding'    
p.yaxis[0].formatter = PrintfTickFormatter(format="£%.1e")
# add interactive legend
p.legend.click_policy = "mute"
p.legend.location = 'top_left'
p.legend.label_text_font_size = '6pt'
    
show(p)

### Bokeh Scatter (Circle)

In [None]:
duration = (gtr_funds_df['end'] - gtr_funds_df['start']).dt.days / 365.25
amount = gtr_funds_df['amount']

p = figure(width=550, height=350, y_axis_type="log")
p.grid.visible = False

p.circle(x=duration, y=amount, size=1, alpha=0.05)

p.xaxis.axis_label = 'Duration (years)'
p.yaxis.axis_label = 'Funding Amount (£)'

p.add_tools(HoverTool(
    tooltips=[("Duration", "$x"), ("Amount", "$y")],
    mode="mouse", point_policy="follow_mouse"
))

show(p)

## Declarative Hexbin

In [None]:
import holoviews as hv
hv.extension('bokeh')

In [None]:
df = pd.DataFrame({'Project Duration (years)': duration,
                   'Funding Amount (log10 £)': np.log10(amount[mask])})

hx = hv.HexTiles(df)
hx.opts(width=550, height=350, logz=True, yformatter='£10^%d',
        tools=['hover'], hover_color='pink', hover_alpha=0.7,
        title='Funding Amount by Project Duration')

To see the plotting options for a specific Holoviews chart type, you can interrogate the object with `?` notation.

```python
from holoviews import opts
opts.HexTiles?
```

## Maps

In [None]:
gtr_org_locs_df = gtr_table('organisations_locations')
gtr_org_locs_df.head()

In [None]:
gtr_org_locs_df = gtr_org_locs_df[(~pd.isnull(df_org_locs['latitude'])) &
                                  (~pd.isnull(df_org_locs)['longitude'])]

### Folium

In [None]:
from im_tutorials.data.gis import country_basic_info
country_df = country_basic_info()

In [None]:
import folium
from folium.plugins import MarkerCluster, FastMarkerCluster

In [None]:
lat_c, lng_c = country_df.set_index('alpha3Code').loc['GBR']['latlng']

cluster_map = folium.Map(location=[lat_c, lng_c], zoom_start=5, width=550, height=550)
cluster_map.add_child(FastMarkerCluster(data=gtr_org_locs_df[['latitude', 'longitude']].values.tolist()))

**Tasks**

1. Change the marker icon.
2. Remove the organisations at duplicate addresses.

## Datashader

Does one job. Does it well.

In [None]:
import datashader as ds, datashader.transfer_functions as tf, numpy as np
from datashader import spatial
from functools import partial
from datashader.utils import export_image
from datashader.colors import colormap_select, Greys9

In [None]:
from holoviews.element import tiles
from holoviews.operation.datashader import datashade

In [None]:
from datashader.utils import lnglat_to_meters as webm

In [None]:
gtr_org_locs_df['easting'], gtr_org_locs_df['northing'] = webm(
    gtr_org_locs_df['longitude'], gtr_org_locs_df['latitude'])

In [None]:
from holoviews.element import tiles
from holoviews.operation.datashader import datashade
from holoviews.streams import RangeXY
from colorcet import kbc

In [None]:
gtr_org_locs_singles = gtr_org_locs_df.drop_duplicates(subset=['latitude', 'longitude'])

In [None]:
cmap = kbc

In [None]:
import geoviews as gv
import cartopy.crs as crs

In [None]:
url = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg'

map_tiles = gv.WMTS(url, crs=crs.GOOGLE_MERCATOR)

In [None]:
from holoviews.operation.datashader import datashade, dynspread

In [None]:
width=600
height=600

opts = dict(width=width, height=height, x_sampling=1, y_sampling=1, cmap=cmap, dynamic=False)
tile_opts  = dict(width=width, height=height, xaxis=None, yaxis=None, bgcolor='white', show_grid=False)


def make_view(x_range, y_range, **kwargs):
    tiles = map_tiles.options(alpha=0.5, **tile_opts)
    points = hv.Points(gtr_org_locs_singles, ['easting', 'northing'])
    d = dynspread(datashade(points, x_range=x_range, y_range=y_range, **opts), shape='circle', threshold=.1)
    return d * tiles

In [None]:
dmap = hv.DynamicMap(make_view, streams=[RangeXY()])
plot = hv.renderer('bokeh').instance(mode='server').get_plot(dmap)
dmap

## Visualising Text

In [None]:
from sklearn.decomposition import TruncatedSVD
from sklearn.manifold import TSNE
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
gtr_projects_funds_df['abstractText'].fillna('', inplace=True)
gtr_projects_funds_df['techAbstractText'].fillna('', inplace=True)
gtr_projects_df['full_text'] = (gtr_projects_funds_df['abstractText'] 
                                + ' ' 
                                + gtr_projects_funds_df['techAbstractText'])

In [None]:
lens = gtr_projects_df['full_text'].str.len()
hist = np.histogram(lens, bins=2000)
df_hist = pd.DataFrame(hist).T
df_hist[0] = df_hist[0].cumsum() / df_hist[0].cumsum().max() * 100
df_hist.columns = ['Freq', 'Length']

In [None]:
hover = HoverTool(
    tooltips=[
        ('Length', '@{Length}'), 
        ('Percentile', '@{Freq} %')], 
    mode='vline')
c = hv.Curve(df_hist, 'Length', 'Freq')
c.opts(tools=[hover])

In [None]:
gtr_text_df = gtr_projects_df[gtr_projects_df['full_text'].str.len() > 280]

In [None]:
gtr_topic_link = gtr_link_table('topic')
gtr_topics_df = gtr_table('topic')

In [None]:
sorted(gtr_topics_df['text'].values)

In [None]:
gtr_topic_link['id'].value_counts()

In [None]:
gtr_topics_df.set_index('id').loc['FB535BD0-E265-4C0A-8532-32DCB83A3951']

In [None]:
topic_id = gtr_topics_df[gtr_topics_df['text'].str.startswith('Tools, tech')].iloc[0]['id']
subset_ids = gtr_topic_link[gtr_topic_link['id'] == topic_id]['project_id']
topic_projects_df = gtr_projects_df.set_index('id').loc[subset_ids]

In [None]:
topic_projects_df['leadFunder'].value_counts()

In [None]:
topic_projects_df = gtr_projects_df.sample(frac=0.05)

In [None]:
text_vecs = TfidfVectorizer(min_df=10, max_df=.2).fit_transform(topic_projects_df['full_text'])
svd_vecs = TruncatedSVD(n_components=30).fit_transform(text_vecs)
tsne_vecs = TSNE().fit_transform(svd_vecs)

In [None]:
s = topic_projects_df['leadFunder'].astype('category').cat.codes

In [None]:
from bokeh.palettes import Category10_10

In [None]:
colors = [Category10_10[g] for g in s]

cds = ColumnDataSource(data={
    'tsne_0': tsne_vecs[:, 0],
    'tsne_1': tsne_vecs[:, 1],
    'color': colors,
    'funder': topic_projects_df['leadFunder'].values,
#     'goal': [sdg_definitions[g] for g in goal_labels_single],
    'title': topic_projects_df['title'],
#     'id': single_goals
})

p = figure(width=900, title='TSNE Plot of Single SDG Article Vectors')

hover = HoverTool(tooltips=[('Funder', '@funder'), ('Title', '@title')])

p.circle(source=cds, x='tsne_0', y='tsne_1', color='color', line_width=0, legend='goal', radius=0.4, alpha=0.9)
p.add_tools(hover)

show(p)
# output_note

In [None]:
gtr_projects_funds_df['full_text'].head()

### Organisation Locations

### Extras

In [None]:
def sine_curve(phase, freq):
    xvals = [0.1* i for i in range(100)]
    return hv.Curve((xvals, [np.sin(phase+freq*x) for x in xvals]))

curve_dict = {f:sine_curve(0,f) for f in frequencies}

In [None]:
def make_multi_line_plot():
    

hv.NdOverlay(
    {c: hv.Path((amount_year_sum.index.values, amount_year_sum[c])) for c in amount_year_sum.columns}).opts(
    'Histogram', width=1000, alpha=0.8, muted_alpha=0.1)

In [None]:
ndoverlay = hv.NdOverlay(curve_dict, kdims='frequency')

In [None]:
import datashader as ds
import datashader.transfer_functions as tf
from datashader.layout import random_layout, circular_layout, forceatlas2_layout
from datashader.bundling import connect_edges, hammer_bundle

In [None]:
import networkx as nx

In [None]:
G = nx.karate_club_graph()

In [None]:
def nx_layout(graph):
    layout = nx.spring_layout(graph)
    data = [[node]+layout[node].tolist() for node in graph.nodes]

    nodes = pd.DataFrame(data, columns=['id', 'x', 'y'])
    nodes.set_index('id', inplace=True)

    edges = pd.DataFrame(list(graph.edges), columns=['source', 'target'])
    return nodes, edges

In [None]:
pos_n, pos_e = nx_layout(G)

In [None]:
def nx_plot(graph, name=""):
    print(graph.name, len(graph.edges))
    nodes, edges = nx_layout(graph)
    
    direct = connect_edges(nodes, edges)
    bundled_bw005 = hammer_bundle(nodes, edges)
    bundled_bw030 = hammer_bundle(nodes, edges, initial_bandwidth=0.07)

    return [graphplot(nodes, direct,         graph.name),
            graphplot(nodes, bundled_bw005, "Bundled bw=0.05"),
            graphplot(nodes, bundled_bw030, "Bundled bw=0.09")]

In [None]:
def edgesplot(edges, name=None, canvas=None):
    canvas = ds.Canvas(**cvsopts) if canvas is None else canvas
    return tf.shade(canvas.line(edges, 'x','y', agg=ds.count()), name=name)
    
def graphplot(nodes, edges, name="", canvas=None, cat=None):
    if canvas is None:
        xr = nodes.x.min(), nodes.x.max()
        yr = nodes.y.min(), nodes.y.max()
        canvas = ds.Canvas(x_range=xr, y_range=yr, **cvsopts)
        
    np = nodesplot(nodes, name + " nodes", canvas, cat)
    ep = edgesplot(edges, name + " edges", canvas)
    return tf.stack(ep, np, how="over", name=name)

def nodesplot(nodes, name=None, canvas=None, cat=None):
    canvas = ds.Canvas(**cvsopts) if canvas is None else canvas
    aggregator=None if cat is None else ds.count_cat(cat)
    agg=canvas.points(nodes,'x','y',aggregator)
    return tf.spread(tf.shade(agg, cmap=["#FF3333"]), px=3, name=name)

In [None]:
cvsopts = dict(plot_height=400, plot_width=400)

In [None]:
from itertools import chain

In [None]:
plots = [nx_plot(g) for g in [G]]

tf.Images(*chain.from_iterable(plots)).cols(3)

In [None]:
ds = hv.Dataset(amount_year_sum)

In [None]:
amount_year_sum = amount_year_sum.reset_index()

In [None]:
scatter = hv.Curve(amount_year_sum, 'start_y', 'EPSRC')

In [None]:
np.arange(NLINES)[np.newaxis, :]

In [None]:
hv.NdOverlay(
    {c: hv.Path((amount_year_sum.index.values, amount_year_sum[c])) for c in amount_year_sum.columns}).opts(
    'Histogram', width=1000, alpha=0.8, muted_alpha=0.1)

In [None]:
hv.Path(aamount_year_sum.columns, amount_year_sum)

In [None]:
(np.arange(N), np.random.rand(N, NLINES) + np.arange(NLINES)[np.newaxis, :])

In [None]:
hv.Path

In [None]:
hv.help(hv.Path)

In [None]:
fig, ax = plt.subplots()
ax.hist(np.log10(gtr_funds_df[gtr_funds_df['amount'] > 0]
                 .groupby('project_id')['amount'].sum()), bins=100);
# ax.set_xscale('log')

### Datashader Map

In [None]:
df_gtr = df_gtr[(df_gtr['rel'] == 'LEAD_ORG') |
               (df_gtr['rel'] == 'COLLAB_ORG') | 
               (df_gtr['rel'] == 'PARTICIPANT_ORG')]

In [None]:
df_gtr.head()

In [None]:
np.sum(df_gtr['id'].value_counts() > 50)

In [None]:
df_gtr['rel'].value_counts()

In [None]:
gtr_df = gtr_df[~pd.isnull(gtr_df['research_topics'])]

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
from itertools import chain

from sklearn.decomposition import TruncatedSVD
from sklearn.manifold import TSNE

from annoy import AnnoyIndex

In [None]:
topics = sorted(set(chain(*gtr_df['research_topics'])))
mlb = MultiLabelBinarizer(classes=topics)
df_topics = pd.DataFrame(mlb.fit_transform(gtr_df['research_topics']), columns=topics)
df_topics = df_topics.T

In [None]:
svd = TruncatedSVD(n_components=30)
tsne = TSNE(n_components=2)

svd_vecs = svd.fit_transform(df_topics)
tsne_vecs = tsne.fit_transform(svd_vecs)

In [None]:
from gensim.sklearn_api.phrases import PhrasesTransformer

In [None]:
t = AnnoyIndex(30, 'angular')  # Length of item vector that will be indexed
for i in range(df_topics.shape[0]):
    t.add_item(i, svd_vecs[i])
    
t.build(500) # 10 trees

In [None]:
min_dist = 0.9

dists = {}
edges = []
for i in range(df_topics.shape[0]):
    closest = t.get_nns_by_item(i, 5)
    source = df_topics.index[closest[0]]
    closest = t.get_nns_by_item(i, 5)[1:]
    for n in closest:
        dist = t.get_distance(i, n)
        if dist <= min_dist:
            ns = df_topics.index[n]
            edge = tuple(sorted([source, ns]))
            edges.append(edge)
            dists[edge] = dist


In [None]:
from collections import Counter
import networkx as nx

In [None]:
edge_list = []
for k, v in Counter(edges).items():
    edge_list.append([k[0], k[1], {'weight': dists[k]}])
    
g = nx.Graph()
g.add_edges_from(edge_list)

nx.draw(g, node_size=25, weight='weight')

In [None]:
plt.scatter(tsne_vecs[:, 0], tsne_vecs[:, 1])

In [None]:
funders_time_df = gtr_df.groupby(['start_year', 'funder_name'])['project_id'].count().unstack().loc[2006:2016]
cds = ColumnDataSource.from_df(funders_time_df)

In [None]:
funders_time_df.shape

In [None]:
from bokeh.palettes import Category20_11
from bokeh.models import HoverTool

In [None]:
cmap = Category20_11

In [None]:
x = cmap.pop()

In [None]:
hover = HoverTool(tooltips=[], mode='vline')

p = figure()

for i, c in enumerate(funders_time_df.columns):
    p.line(source=cds, x='start_year', y=c, line_width=2, alpha=0.9, color=Category20_11[i],
          name='x')
    hover.tooltips.append((f'{c}', f'@{c}'))
p.add_tools(hover)
    
show(p)

In [None]:
p = figure