# Iris dataset example

This example showcases using K3D to visualize data. Addtional requirements: `scikit-learn`

In [1]:
try:
    import sklearn
except ImportError:
    %pip install scikit-learn

In [2]:
from sklearn.datasets import load_iris
from sklearn.manifold import TSNE

from k3d import plot, points, nice_colors, text2d

In [3]:
iris = load_iris()

# Plots omitting each column once

In [4]:
from k3d import plot, points, nice_colors, text2d
import numpy as np

def legend(p, axes):
    """Display classes' names in their color."""
    k3dplot = plot(axes=['\:'.join(a.split(' ')) for a in axes.tolist()])
    k3dplot += p
    for i, name in enumerate(iris.target_names):
        k3dplot += text2d(text=name, color=nice_colors[i], position=(0, i / 10))
    return k3dplot

def point_size(data, resolution=20.):
    span = max(np.max(data, axis=0) - np.min(data, axis=0))
    return span / resolution

common = dict(
    point_size=point_size(iris.data), 
    colors=[nice_colors[i] for i in iris.target]
)

In [5]:
def iris_omit(column_index):
    names = np.roll(iris.feature_names, column_index)
    print('omitting', names[0])
    for dim, name in zip('xyz', names[1:]):
        print(dim, 'is', name)
    return (np.roll(iris.data, column_index, axis=1)[:, 1:].astype(np.float32), names[1:])

In [8]:
p, axes = iris_omit(0)
legend(points(p, **common), axes)

omitting sepal length (cm)
x is sepal width (cm)
y is petal length (cm)
z is petal width (cm)


Plot(antialias=3, axes=['sepal\\:width\\:(cm)', 'petal\\:length\\:(cm)', 'petal\\:width\\:(cm)'], axes_helper=…

In [9]:
p, axes = iris_omit(1)
legend(points(p, **common), axes)

omitting petal width (cm)
x is sepal length (cm)
y is sepal width (cm)
z is petal length (cm)


Plot(antialias=3, axes=['sepal\\:length\\:(cm)', 'sepal\\:width\\:(cm)', 'petal\\:length\\:(cm)'], axes_helper…

In [10]:
p, axes = iris_omit(2)
legend(points(p, **common), axes)

omitting petal length (cm)
x is petal width (cm)
y is sepal length (cm)
z is sepal width (cm)


Plot(antialias=3, axes=['petal\\:width\\:(cm)', 'sepal\\:length\\:(cm)', 'sepal\\:width\\:(cm)'], axes_helper=…

In [11]:
p, axes = iris_omit(3)
legend(points(p, **common), axes)

omitting sepal width (cm)
x is petal length (cm)
y is petal width (cm)
z is sepal length (cm)


Plot(antialias=3, axes=['petal\\:length\\:(cm)', 'petal\\:width\\:(cm)', 'sepal\\:length\\:(cm)'], axes_helper…

# 3-D trainable Stochastic Neighbor Embedding

This can look a little different every time around.

**NOTE**: this is unsupervised learning, tSNE doesn't get the labels. And still, it clusters.

In [12]:
tsne = TSNE(n_components=3, verbose=1, perplexity=40, n_iter=3000)

In [13]:
tsne_results = tsne.fit_transform(iris.data).astype(np.float32)

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 150 samples in 0.000s...
[t-SNE] Computed neighbors for 150 samples in 0.005s...
[t-SNE] Computed conditional probabilities for sample 150 / 150
[t-SNE] Mean sigma: 0.667454
[t-SNE] KL divergence after 250 iterations with early exaggeration: 97.631813
[t-SNE] KL divergence after 3000 iterations: 1.099588


In [14]:
tsne_results[:5]

array([[   8.496279,  213.87694 ,  126.09281 ],
       [ 129.11748 ,  211.5936  ,  236.7509  ],
       [ 248.33766 ,  198.84265 , -361.45593 ],
       [ 101.12183 ,   43.96559 ,  105.963356],
       [ -52.295998,  122.46141 ,   96.25618 ]], dtype=float32)

In [15]:
common['point_size'] = point_size(tsne_results)
legend(points(tsne_results, **common), axes)

Plot(antialias=3, axes=['petal\\:length\\:(cm)', 'petal\\:width\\:(cm)', 'sepal\\:length\\:(cm)'], axes_helper…