## Plotly plot of the kmapper graph associated to the breast cancer dataset

In [None]:
# %load breast-cancer.py
import sys
try:
    import pandas as pd
except ImportError as e:
    print("pandas is required for this example. Please install with conda or pip  and then try again.")
    sys.exit()

import numpy as np
import kmapper as km
import sklearn
from sklearn import ensemble
from kmapper import plotlyviz as pl

In [None]:
from plotly.offline import download_plotlyjs, init_notebook_mode,  iplot, plot
init_notebook_mode(connected=True)

In [None]:
# For data we use the Wisconsin Breast Cancer Dataset
# Via: https://www.kaggle.com/uciml/breast-cancer-wisconsin-data
df = pd.read_csv("data.csv")
feature_names = [c for c in df.columns if c not in ["id", "diagnosis"]]
df["diagnosis"] = df["diagnosis"].apply(lambda x: 1 if x == "M" else 0)
X = np.array(df[feature_names].fillna(0)) # quick and dirty imputation
y = np.array(df["diagnosis"])

# We create a custom 1-D lens with Isolation Forest
model = ensemble.IsolationForest(random_state=1729)
model.fit(X)
lens1 = model.decision_function(X).reshape((X.shape[0], 1))

# We create another 1-D lens with L2-norm
mapper = km.KeplerMapper(verbose=0)
lens2 = mapper.fit_transform(X, projection="l2norm")

# Combine both lenses to create a 2-D [Isolation Forest, L^2-Norm] lens
lens = np.c_[lens1, lens2]

# Create the simplicial complex
scomplex = mapper.map(lens,
                      X,
                      nr_cubes=15,
                      overlap_perc=0.7,
                      clusterer=sklearn.cluster.KMeans(n_clusters=2,
                                               random_state=1618033))


In [None]:
breastc_dict={0: 'benign', 1: 'malignant'}

First we visualize the resulting graph via a   `color_function` that associates to lens data   their  x-coordinate, and  colormap these coordinates  to a given Plotly colorscale.

In [None]:
color_function=lens[:,0]-lens[:,0].min()
kmgraph,  meta=mapper.visualize(scomplex,  color_function=color_function,  path_html=None)

Define a Brewer type colorscale:

In [None]:
pl_brewer=[[0.0, '#a50026'],
           [0.1, '#d73027'],
           [0.2, '#f46d43'],
           [0.3, '#fdae61'],
           [0.4, '#fee08b'],
           [0.5, '#ffffbf'],
           [0.6, '#d9ef8b'],
           [0.7, '#a6d96a'],
           [0.8, '#66bd63'],
           [0.9, '#1a9850'],
           [1.0, '#006837']]

In [None]:
plotly_graph_data=pl.plotly_graph(kmgraph, graph_layout='fr', colorscale=pl_brewer, reversescale=True, 
                                  factor_size=3, edge_linewidth=0.5)
layout=pl.plot_layout(title='Topological network representing the  breast cancer  dataset',  width=900, height=900,
                      annotation_text=meta,  
                      bgcolor='rgba(0,0,0, 1)')

fig_network=dict(data=plotly_graph_data, layout=layout)
iplot(fig_network)

For a more informative coloring, let us compute the proportion of malignant members in each node, 
and map these proportions to the colorscale:

In [None]:
tooltips=plotly_graph_data[1]['text']

In [None]:
new_color=[]
for j, node in enumerate(kmgraph['nodes']):
    member_label_ids=y[scomplex['nodes'][node['name']]]
    member_labels=[breastc_dict[id] for id in member_label_ids]
    f_type, f_number=np.unique(member_labels, return_counts=True) 
    
    n_members=f_number.sum()
    if f_type.shape[0]==1:
        if f_type[0]=='benign':
            new_color.append(0)
        else:
            new_color.append(1)
    else:        
        new_color.append(f_number[1]/n_members)
       
    for m in range(len(f_number)):
        tooltips[j]+='<br>'+str(f_type[m])+': '+ str(f_number[m])

plotly_graph_data[1].update(text=tooltips)
plotly_graph_data[1]['marker'].update(color=new_color)


Offline plot of the graph:

In [None]:
fign=dict(data=plotly_graph_data, layout=layout)
iplot(fign)

Another option is to send the graph to Plotly cloud:

In [None]:
import plotly.plotly as py
#py.sign_in('empet', 'my_api_key')
#py.iplot(fign, filename='tda-breastcancer')