In [None]:
# select dataset "digits", "wine", "forest" and "cancer"
dataset, label, standardscaler = ("digits", True, True)

In [None]:
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn import (manifold, datasets, decomposition, ensemble,
                     discriminant_analysis, random_projection)
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

## Datasets

In [None]:
if dataset == "digits":
  digits = datasets.load_digits(n_class=10)
  X = digits.data
  y = digits.target

In [None]:
if dataset == "wine":
  wine = datasets.load_wine()
  X = wine.data
  y = wine.target

In [None]:
if dataset == "forest":
  forest = datasets.fetch_covtype(shuffle=True)
  X = forest.data[:1000]
  y = forest.target[:1000]

In [None]:
if dataset == "cancer":
  cancer = datasets.load_breast_cancer()
  X = cancer.data
  y = cancer.target

In [None]:
def ss(X):
  sc = StandardScaler()
  sc.fit(X)
  return sc.transform(X)

In [None]:
if standardscaler == True:
  X = ss(X)

In [None]:
if label == False:
  y = None

In [None]:
X[0]

array([ 0.        , -0.33501649, -0.04308102,  0.27407152, -0.66447751,
       -0.84412939, -0.40972392, -0.12502292, -0.05907756, -0.62400926,
        0.4829745 ,  0.75962245, -0.05842586,  1.12772113,  0.87958306,
       -0.13043338, -0.04462507,  0.11144272,  0.89588044, -0.86066632,
       -1.14964846,  0.51547187,  1.90596347, -0.11422184, -0.03337973,
        0.48648928,  0.46988512, -1.49990136, -1.61406277,  0.07639777,
        1.54181413, -0.04723238,  0.        ,  0.76465553,  0.05263019,
       -1.44763006, -1.73666443,  0.04361588,  1.43955804,  0.        ,
       -0.06134367,  0.8105536 ,  0.63011714, -1.12245711, -1.06623158,
        0.66096475,  0.81845076, -0.08874162, -0.03543326,  0.74211893,
        1.15065212, -0.86867056,  0.11012973,  0.53761116, -0.75743581,
       -0.20978513, -0.02359646, -0.29908135,  0.08671869,  0.20829258,
       -0.36677122, -1.14664746, -0.5056698 , -0.19600752])

## Visualize (2D, 3D)

In [None]:
def _plot(_2d, _3d, sz=600, title="", target=None):
  fig = make_subplots(rows=1, cols=2, 
                      specs=[[{"type": "xy"}, {"type": "scene"}]])
  if type(target) != type(None):
    d, _text = {}, []
    for i, t in enumerate(set(target)):
      d.setdefault(t, i)
    for t in target:
      _text.append(int(d[t]))
    fig.add_trace(
      go.Scatter(x=_2d[:, 0], y=_2d[:, 1], 
                mode='text', marker=dict(size=5), text=_text), 
      row=1, col=1)
    fig.add_trace(
      go.Scatter3d(x=_3d[:, 0], y=_3d[:, 1], z=_3d[:, 2], 
                  mode='text', marker=dict(size=1), text=_text),
      row=1, col=2)
  else:
    fig.add_trace(
      go.Scatter(x=_2d[:, 0], y=_2d[:, 1], 
                mode='markers', marker=dict(size=5)), 
      row=1, col=1)
    fig.add_trace(
      go.Scatter3d(x=_3d[:, 0], y=_3d[:, 1], z=_3d[:, 2], 
                  mode='markers', marker=dict(size=1)),
      row=1, col=2)
  fig.update_layout(height=sz, width=sz*2, title_text=title)
  fig.show()

In [None]:
X_pca = decomposition.TruncatedSVD(n_components=2).fit_transform(X)
X_pca3d = decomposition.TruncatedSVD(n_components=3).fit_transform(X)
_plot(X_pca, X_pca3d, 500, "Computing PCA projection", y)

In [None]:
X_iso = manifold.Isomap(n_neighbors=30, n_components=2).fit_transform(X)
X_iso3d = manifold.Isomap(n_neighbors=30, n_components=3).fit_transform(X)
_plot(X_iso, X_iso3d, 500, "Computing Isomap embedding", y)

In [None]:
methods = ["standard", "modified"]#, "hessian", "ltsa"]
dim = [2, 3]
for m in methods:
  X_lle = []
  for d in dim:
    clf = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=d, method=m)
    X_lle.append(clf.fit_transform(X))
  _plot(X_lle[0], X_lle[1], 500, "Computing LLE embedding (" + m + ")", y)

In [None]:
embedder = manifold.SpectralEmbedding(n_components=2, random_state=0, eigen_solver="arpack")
embedder3d = manifold.SpectralEmbedding(n_components=3, random_state=0, eigen_solver="arpack")
X_se = embedder.fit_transform(X)
X_se3d = embedder3d.fit_transform(X)
_plot(X_se, X_se3d, 500, "Computing Spectral embedding", y)

In [None]:
clf = manifold.MDS(n_components=2, n_init=1, max_iter=100)
clf3d = manifold.MDS(n_components=3, n_init=1, max_iter=100)
X_mds = clf.fit_transform(X)
X_mds3d = clf3d.fit_transform(X)
_plot(X_mds, X_mds3d, 500, "Computing MDS embedding", y)

In [None]:
tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
tsne3d = manifold.TSNE(n_components=3, init='pca', random_state=0)
X_tsne = tsne.fit_transform(X)
X_tsne3d = tsne3d.fit_transform(X)
_plot(X_tsne, X_tsne3d, 500, "Computing t-SNE embedding", y)

In [None]:
hasher = ensemble.RandomTreesEmbedding(n_estimators=200, random_state=0, max_depth=5)
X_transformed = hasher.fit_transform(X)
pca = decomposition.TruncatedSVD(n_components=2)
X_reduced = pca.fit_transform(X_transformed)
pca3d = decomposition.TruncatedSVD(n_components=3)
X_reduced3d = pca3d.fit_transform(X_transformed)
_plot(X_reduced, X_reduced3d, 500, "Computing Totally Random Trees embedding", y)

## SVM

In [None]:
if label == True:
  X_train, X_test, y_train, y_test = train_test_split(
      X, y, test_size=0.3, random_state=None)

  model = SVC(kernel='linear', random_state=None)
  model.fit(X_train, y_train)

  pred_train = model.predict(X_train)
  accuracy_train = accuracy_score(y_train, pred_train)

  pred_test = model.predict(X_test)
  accuracy_test = accuracy_score(y_test, pred_test)

  print('accuracy (training data, test data): %.2f, %.2f' % 
        (accuracy_train, accuracy_test))

accuracy (training data, test data): 1.00, 0.98
