In [None]:
!pip install plotly

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import networkx as nx

import kmapper as km
from kmapper import jupyter
import kmapper.evaluate as evaluate

from sklearn.datasets import make_circles, load_digits
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
from sklearn.manifold import Isomap

from kmapper.plotlyviz import plotlyviz
from kmapper.plotlyviz import *
import plotly.graph_objs as go
import ipywidgets as ipw

## Seminar 17: Mapper

### Concentric circles

In [None]:
# load data
data, labels = make_circles(n_samples=5000, noise=0.03, factor=0.3)

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(data[:,0], data[:,1], c="b", s=10, alpha=0.5)
plt.show()

#### 1D cover

In [None]:
# lens function is a projection to the first coordinate
lens_f_1d = km.KeplerMapper().fit_transform(data, projection=[0])

In [None]:
n_intervals = 20
overlap = 0.25

In [None]:
# init mapper
mapper = km.KeplerMapper(verbose=1)

# build cover
cover_1d = km.Cover(n_intervals, overlap)

# build mapper graph
graph = mapper.map(lens_f_1d, data, cover=cover_1d)

In [None]:
colors = []
sizes = []

for i in range(len(graph['nodes'])):
    index_list = [l for l in graph['nodes'].values()][i]
    color = np.mean(lens_f_1d[index_list])
    colors.append(color)
    sizes.append(len(index_list))

In [None]:
G = km.adapter.to_networkx(graph)
nx.draw(G, pos=nx.kamada_kawai_layout(G), node_color=colors, node_size=sizes, cmap="rainbow")
plt.show()

In [None]:
_ = mapper.visualize(graph, path_html="make_circles_keplermapper1d.html")

#### 2D cover

In [None]:
# lens function is a projection to the first two coordinates
lens_f_2d = km.KeplerMapper().fit_transform(data, projection=[0, 1])

In [None]:
n_intervals = 12
overlap = 0.25

In [None]:
# init mapper
mapper = km.KeplerMapper(verbose=1)

# build cover
cover_1d = km.Cover(n_intervals, overlap)

# build mapper graph
graph = mapper.map(lens_f_2d, data, cover=cover_1d)

In [None]:
colors = []
sizes = []

for i in range(len(graph['nodes'])):
    index_list = [l for l in graph['nodes'].values()][i]
    color = np.mean(lens_f_2d[index_list])
    colors.append(color)
    sizes.append(len(index_list))

In [None]:
G = km.adapter.to_networkx(graph)
nx.draw(G, pos=nx.kamada_kawai_layout(G), node_color=colors, node_size=sizes, cmap="rainbow")
plt.show()

In [None]:
_ = mapper.visualize(graph, path_html="make_circles_keplermapper2d.html")

### Two circles

In [None]:
idx = np.arange(0, 10000, 1)

data = np.loadtxt("./data/two_cir.csv",delimiter=",")[idx]
lens_f = data[:,0] # lens function is a projection to the first coordinate

In [None]:
plt.figure(figsize=(12,5))
plt.scatter(data[:,0], data[:,1], c=lens_f, cmap="rainbow", s=10, alpha=0.5)
plt.show()

#### Mapper

In [None]:
n_intervals = 12
overlap = 0.01

In [None]:
# init mapper
mapper = km.KeplerMapper(verbose=1)

# build cover
mapper_cover = km.Cover(n_intervals, overlap)

# build mapper graph
mapper_graph = mapper.map(lens_f, data, cover=mapper_cover)

In [None]:
interval_table_2 = np.zeros((2,n_intervals))  
s = mapper_cover.bounds_[0]
L = mapper_cover.bounds_[1] - mapper_cover.bounds_[0]
x = L/(n_intervals-(n_intervals-1)*overlap)
for i in range(n_intervals):
    interval_table_2[0,i] = s
    interval_table_2[1,i] = s + x
    s = s + (1 - overlap)*x

color_list2 = []
sizes2 = []
for i in range(len(mapper_graph['nodes'])):
    index_list = [l for l in mapper_graph['nodes'].values()][i]
    color = np.mean(lens_f[index_list])
    color_list2.append(color)
    sizes2.append(len(index_list))

G2 = km.adapter.to_networkx(mapper_graph)
nx.draw(G2, pos=nx.kamada_kawai_layout(G2), node_color=color_list2, node_size=sizes2, cmap="rainbow")
plt.show()

#### D-Mapper

In [None]:
n_intervals = 13
alpha = 0.1273

In [None]:
# init mapper
dmapper = km.D_Mapper(verbose=1)

# build cover
dmapper_cover = km.D_Cover(n_intervals, alpha, n_init=10, max_iter=200000, verbose=1)

# build mapper graph
dmapper_graph = dmapper.map(lens_f, data, cover=dmapper_cover)

In [None]:
interval_table_1 = dmapper_cover.interval_table

color_list1 = []
sizes1 = []
for i in range(len(dmapper_graph['nodes'])):
    index_list = [l for l in dmapper_graph['nodes'].values()][i]
    color = np.mean(lens_f[index_list])
    color_list1.append(color)
    sizes1.append(len(index_list))

G1 = km.adapter.to_networkx(dmapper_graph)
nx.draw(G1, pos=nx.kamada_kawai_layout(G1), node_color=color_list1, node_size=sizes1, cmap="rainbow")
plt.show()

#### Comparing covers

In [None]:
plt.figure(dpi=200)

for i in range(1,interval_table_1.shape[1]+1):
    plt.plot([interval_table_1[0,i-1],interval_table_1[1,i-1]],[0.1*(i % 2)-2.5,0.1*(i%2)-2.5])

for i in range(1,interval_table_2.shape[1]+1):
    plt.plot([interval_table_2[0,i-1],interval_table_2[1,i-1]],[0.1*(i % 2)-1.6,0.1*(i%2)-1.6])

plt.scatter(data[:,0],data[:,1],s=0.1, c=lens_f, cmap="rainbow")

plt.text(-1, -2.1, 'D-Mapper:', fontsize=9) 
plt.text(-1, -1.2, 'Classic Mapper:', fontsize=9) 

ax = plt.gca()
ax.set_aspect(1)
plt.yticks([])
plt.show()

### Digits

In [None]:
data, labels = load_digits().data, load_digits().target
data_pca = Isomap(n_neighbors=7, n_components=2).fit_transform(data)

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(data_pca[:,0], data_pca[:,1], c=labels, s=10, alpha=0.5)
plt.show()

In [None]:
n_intervals = 35
overlap = 0.6

In [None]:
# init mapper
mapper = km.KeplerMapper(verbose=0)

# specify cluster algorithm
clusterer = DBSCAN(eps=0.3, min_samples=15)

# 2D lens function as the Isomap embedding
projection = Isomap(n_neighbors=7, n_components=2)
lens_f = mapper.fit_transform(data, projection=projection)

# build cover
cover = km.Cover(n_intervals, overlap)

# build mapper complex
cmplx = mapper.map(
    lens_f,
    clusterer=clusterer,
    cover=cover
)

In [None]:
color_values = lens_f[:, 1]-lens_f[:, 1].min()
plotlyviz(cmplx, 
          title='Mapper graph of digits dataset',
          color_values=color_values, 
          color_function_name='Distance to y-min', 
          node_linecolor='rgb(100,100,100)',
          bgcolor='rgb(240,240,240)',
          width=620, height=620,
          summary_height=350,
          summary_left=10,
          hist_left=25,
          hist_right=10,
          graph_data=True)