In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline
import kmapper as km
from sklearn import datasets,cluster
from sklearn.manifold import Isomap,MDS
import matplotlib.pyplot as plt

from matplotlib import cm, colors
import pandas as pd
import numpy as np
import networkx as nx
from networkx import path_graph, random_layout
from ipywidgets import interact, interactive,GridspecLayout
import ipywidgets as widgets

import random

In [3]:
df=pd.read_csv('.csv')# data passed to mapper
data=df.values#np.array(df)
end_df=pd.read_csv('.csv')# data for painting the network

In [4]:
#Function to wrap mapper and generate the network. For now the projection is by using Isomap, but the idea is to make the specific
#projection and clustering algorithm argguments for this function.

def TDAmapper(n_cubes=10,per_overlap=70, metric='euclidean'):
    global nx_graph
    np.random.seed(4812)
    random.seed(246) #after reading a bit the conclusion is that KeplerMapper uses both np.random and random processes,
    #so to be consistent I set both seeds so if you run mapper with the same parameters several times you get the same graph
    
    mapper = km.KeplerMapper(verbose=0)

    # Fit to and transform the data
    lens = mapper.fit_transform(data, projection=Isomap(n_components=2, n_neighbors=3), 
                                          distance_matrix=metric,scaler=None)

    graph = mapper.map(lens, data, nr_cubes=n_cubes, overlap_perc=per_overlap/100,
                       clusterer=cluster.AgglomerativeClustering(n_clusters=4,
                                                                 linkage="complete",
                                                                 affinity="cosine"))
    nx_graph=km.adapter.to_nx(graph)
    return(nx_graph)

In [5]:
#helper funtion to scale and map the values of the specified variable to color the nodes of the graph
def mapping(variable):
    var_type=end_df[variable].dtype.name
    if var_type=='string'or var_type=='object' or var_type=='category':
        end_df[variable]=end_df[variable].astype('category').cat.codes.replace(-1,np.NaN)
        
    mapping=[end_df.loc[nx_graph.node[i]['membership'], variable].mean() for i in nx_graph.node]
    return(mapping)

In [6]:
#function to plot and color two networks
def plot(seed, node_size, scale_L, scale_R, res,variable_L='mean.map', variable_R='mean.HR'):
        
    mapping_L=mapping(variable_L)
    mapping_R=mapping(variable_R)
    norm_L = colors.Normalize(vmin=min(mapping_L), vmax=max(mapping_L))
    norm_R = colors.Normalize(vmin=min(mapping_R), vmax=max(mapping_R))
    plt.rcParams['figure.dpi']=res
    
    fig, ax = plt.subplots(ncols=2, figsize=(17,7))
    
    np.random.seed(seed)
    nL=nx.draw(nx_graph,node_size=node_size, width=0.05, linewidths=2,ax=ax[0],vmin=scale_L[0],vmax=scale_L[1],
            cmap=plt.cm.RdYlBu_r, node_color=mapping_L)
    np.random.seed(seed)
    nR=nx.draw(nx_graph,node_size=node_size, width=0.05, linewidths=2,ax=ax[1],vmin=scale_R[0],vmax=scale_R[1],
            cmap=plt.cm.RdYlBu_r, node_color=mapping_R)
    
    fig.colorbar(cm.ScalarMappable(norm=norm_L, cmap=plt.cm.RdYlBu_r), ax=ax[0], orientation="horizontal", pad=0.01)
    fig.colorbar(cm.ScalarMappable(norm=norm_R, cmap=plt.cm.RdYlBu_r), ax=ax[1], orientation="horizontal", pad=0.01)

    fig.canvas.draw_idle()

In [8]:
TDAmapper()#Initialize mapper

<networkx.classes.graph.Graph at 0xdf2ea50>

In [9]:
#Creates a widget UI to change mapper parameters in the notebook. I am using this as a fast prototyping of a UI

#widgets for runing Mapper
mapper_grid = GridspecLayout(3, 4, height='150px') #generates the layout for Run Mapper tab
mapper_grid[0,0]=widgets.IntSlider(min=1, max=100, step=1, continuous_update=False, description='N cubes:')
mapper_grid[1,0]=widgets.FloatSlider(min=1, max=300, step=0.1, continuous_update=False, description='% overlap:')
mapper_grid[2,0]=widgets.Dropdown(options=['euclidean'], value='euclidean', description='Metric:') #more metrics available, need implementation
mapper_grid[0,1]=widgets.Dropdown(options=['Isomap', 'PCA', 'MDS'], value='Isomap', description='Lens:') #not implemented yet
mapper_grid[2,3]=widgets.Button(description='Run MAPPER')

#widgets for mapping both graphs
mapping_grid = GridspecLayout(3, 5, height='150px')# generates the layout for plotting tab
mapping_grid[0,0]=widgets.IntSlider(min=0,max=1000, step=10, continuous_update=False, description='Layout seed:') 
mapping_grid[0,1]=widgets.IntSlider(min=10, max=100, step=1, continuous_update=False, description='Node size:')
mapping_grid[0,2]=widgets.Dropdown(options=[('50dpi',50), ('75dpi',75), ('150dpi', 150), ('300dpi', 300)],
                           value=75,continuous_update=False, description='Img dpi:')
mapping_grid[1,0]=widgets.Dropdown(options=end_df.columns,value=end_df.columns[0],description='Mapping var:',disabled=False)
mapping_grid[1,2]=widgets.Dropdown(options=end_df.columns,value=end_df.columns[1],description='Mapping var:',disabled=False)

mapping_L=mapping(mapping_grid[1,0].value)
mapping_R=mapping(mapping_grid[1,2].value)

mapping_grid[2,0]=widgets.FloatRangeSlider(value=[min(mapping_L),max(mapping_L)], 
                                           min=min(mapping_L),
                                           max=max(mapping_L),step=0.1,description='Scale:',
                                   disabled=False,continuous_update=False,orientation='horizontal', 
                                   readout=True,readout_format='.1f')

mapping_grid[2,2]=widgets.FloatRangeSlider(value=[min(mapping_R),max(mapping_R)], 
                                           min=min(mapping_R),
                                           max=max(mapping_R),step=0.1,description='Scale:',
                                   disabled=False,continuous_update=False,orientation='horizontal', 
                                   readout=True,readout_format='.1f')

# re scale the slider L depending on the range of the variable selected to color
def update_scale_L_range(*args):
    mapping_grid[2,0].min = min(mapping(mapping_grid[1,0].value))
    mapping_grid[2,0].max = max(mapping(mapping_grid[1,0].value))
    
mapping_grid[1,0].observe(update_scale_L_range, 'value')

# re scale the slider R depending on the range of the variable selected to color
def update_scale_R_range(*args):
    mapping_grid[2,2].min = min(mapping(mapping_grid[1,2].value))
    mapping_grid[2,2].max = max(mapping(mapping_grid[1,2].value))
    
mapping_grid[1,2].observe(update_scale_R_range, 'value')

tab_nest = widgets.Tab()
tab_nest.children = [mapper_grid, mapping_grid]
tab_nest.set_title(0, 'Run MAPPER')
tab_nest.set_title(1, 'Plotting')

plot_out = widgets.interactive_output(plot, {'seed': mapping_grid[0,0], 'node_size': mapping_grid[0,1], 'scale_L': mapping_grid[2,0],
                                         'scale_R':mapping_grid[2,2], 'variable_L':mapping_grid[1,0], 'variable_R':mapping_grid[1,2], 
                                         'res':mapping_grid[0,2]})

#re run mapper when 'run mapper' is clicked
def run_clicked(b):
    plot_out.clear_output()
    TDAmapper(n_cubes=mapper_grid[0,0].value, per_overlap=mapper_grid[1,0].value)
    out = widgets.interactive_output(plot, {'seed': mapping_grid[0,0], 'node_size': mapping_grid[0,1], 'scale_L': mapping_grid[2,0],
                                         'scale_R':mapping_grid[2,2], 'variable_L':mapping_grid[1,0], 'variable_R':mapping_grid[1,2], 
                                         'res':mapping_grid[0,2]})
    with plot_out:
        display(out)
        
mapper_grid[2,3].on_click(run_clicked)

In [None]:
#generates mapper network based on the widgets default
TDAmapper(n_cubes=mapper_grid[0,0].value, per_overlap=mapper_grid[1,0].value)

In [10]:
#Displays the UI. There are two tabs, the first to control parameters for mapper. Every time a parameter is changed, the run
#mapper button signals to re run mapper. I did not wanted to do a 'live' update of mapper since depending on the parameters
#takes a lot of time and lag build up. The second tab controls the coloring parameters and can be change without the need to run
#mapper every time, what makes the process a bit faster.

display(widgets.VBox([tab_nest, plot_out]))

VBox(children=(Tab(children=(GridspecLayout(children=(IntSlider(value=1, continuous_update=False, description=…