Kepler Mapper demo.  See https://kepler-mapper.scikit-tda.org/en/latest/ for documentation

In [79]:
# Import the class
import kmapper as km
import sklearn.manifold as manifold

# Some sample data
from sklearn import datasets
data, labels = datasets.make_circles(n_samples=5000, noise=0.03, factor=0.3)



In [14]:
data.shape

(5000, 2)

In [None]:
# Initialize
mapper = km.KeplerMapper(verbose=1)

# Fit to and transform the data
projected_data = mapper.fit_transform(data, projection=[0,1]) # X-Y axis

# Create dictionary called 'graph' with nodes, edges and meta-information
graph = mapper.map(projected_data, data)

# Visualize it
mapper.visualize(graph, path_html="make_circles_keplermapper_output.html",
                 title="make_circles(n_samples=5000, noise=0.03, factor=0.3)")

In [22]:
from kmapper import jupyter # Creates custom CSS full-size Jupyter screen

# Inline display
# jupyter.display(path_html="http://mlwave.github.io/tda/word2vec-gender-bias.html")
jupyter.display(path_html="make_circles_keplermapper_output.html")



In [80]:
import pandas as pd

In [81]:
df = pd.read_csv("../../chemdiab.csv")
df2 = df[["rw", "fpg", "ga", "ina", "sspg", "cc"]]

In [82]:
df

Unnamed: 0.1,Unnamed: 0,rw,fpg,ga,ina,sspg,cc
0,1,0.81,80,356,124,55,Normal
1,2,0.95,97,289,117,76,Normal
2,3,0.94,105,319,143,105,Normal
3,4,1.04,90,356,199,108,Normal
4,5,1.00,90,323,240,143,Normal
...,...,...,...,...,...,...,...
140,141,1.05,353,1428,41,480,Overt_Diabetic
141,142,0.91,180,923,77,150,Overt_Diabetic
142,143,0.90,213,1025,29,209,Overt_Diabetic
143,144,1.11,328,1246,124,442,Overt_Diabetic


In [83]:
df2.to_csv('chemdiab.csv', index=False)

In [84]:
data = df2[['rw', 'fpg', 'ga', 'ina', 'sspg']].to_numpy()
labels = df2[['cc']].to_numpy()

In [85]:
data.shape

(145, 5)

In [86]:
from sklearn.decomposition import PCA
from sklearn.neighbors import KernelDensity
from sklearn.cluster import DBSCAN

In [30]:
KernelDensity().fit(data)

KernelDensity()

In [100]:
# Initialize
mapper = km.KeplerMapper(verbose=1)

# Fit to and transform the data
projected_data = mapper.fit_transform(data, 
                                      #projection=manifold.Isomap(n_components=100, n_jobs=-1)
                                      #projection=[1,0,0,0,0]
                                      projection=PCA(n_components=2)
                                      #projection=KernelDensity()
                                     ) # X-Y axis

# Create dictionary called 'graph' with nodes, edges and meta-information
graph = mapper.map(projected_data, data,
                  clusterer=DBSCAN(eps=200, min_samples=2),
                  cover=km.Cover(n_cubes=20, perc_overlap=0.6))

KeplerMapper(verbose=1)
..Composing projection pipeline of length 1:
	Projections: PCA(n_components=2)
	Distance matrices: False
	Scalers: MinMaxScaler()
..Projecting on data shaped (145, 5)

..Projecting data using: 
	PCA(n_components=2)


..Scaling with: MinMaxScaler()

Mapping on data shaped (145, 5) using lens shaped (145, 2)

Creating 400 hypercubes.

Created 619 edges and 122 nodes in 0:00:00.094934.


In [101]:
# Visualize it
mapper.visualize(graph, path_html="chemdiab_keplermapper_output.html",
                 title="make_circles(n_samples=5000, noise=0.03, factor=0.3)")

from kmapper import jupyter # Creates custom CSS full-size Jupyter screen

# Inline display
# jupyter.display(path_html="http://mlwave.github.io/tda/word2vec-gender-bias.html")
jupyter.display(path_html="chemdiab_keplermapper_output.html")

Wrote visualization to: chemdiab_keplermapper_output.html




In [41]:
?mapper.map

In [70]:
?km.Cover

In [47]:
?DBSCAN