# Dimensionality reduction using graphs

In [None]:
from utils import visualization, weights, features, build, embedding, dim_reduction, evaluation
from data import preprocessing, small
import numpy as np
from sklearn.datasets import load_iris, load_digits
from sklearn.decomposition import PCA
import umap

## Graph building

In [None]:
builder = build.CheapestBuilder(weight_fun=weights.reciprocal)

In [None]:
data = np.array([[3,-1],
                 [4,0],
                 [5,-2],
                 [4,6],
                 [5,7],
                 [5.5,6]])
labels=[0,0,0,1,1,1]
builder.build(data)
graph = builder.graph

In [None]:
visualization.show_data(data=data, graph=graph, labels=labels, square=True)

In [None]:
visualization.show_graph(graph=graph, labels=labels, dpi=600, show_numbers=True)

## Node embedding
### Without node features

In [None]:
builder = build.CheapestBuilder(weight_fun=weights.reciprocal)
builder.build(data)

#### Spring

In [None]:
embedder = embedding.SpringEmbedder()
embedder.embed(builder.graph)

In [None]:
visualization.show_data(embedder.embeddings, graph=graph, labels=labels)

#### Kamada Kawai

In [None]:
embedder = embedding.KamadaKawaiEmbedder()
embedder.embed(builder.graph)

In [None]:
visualization.show_data(embedder.embeddings, graph=graph, labels=labels)

#### Watch your step

In [None]:
embedder = embedding.WatchYourStepEmbedder(batch_size=6, adjacency_powers=3)
embedder.embed(builder.graph)

In [None]:
visualization.show_data(embedder.embeddings, graph=graph, labels=labels)

### With node features

In [None]:
builder = build.CheapestBuilder(weight_fun=weights.reciprocal, feature_fun=features.feature_coords)
builder.build(data)

#### GraphSAGE

In [None]:
embedder = embedding.GraphSAGEEmbedder()
embedder.embed(builder.graph)

In [None]:
visualization.show_data(embedder.embeddings, graph=graph, labels=labels)

## Results on real datasets
### Dataset: Iris

In [None]:
iris = load_iris()
data = iris.data
labels = iris.target
data, labels = preprocessing.remove_duplicities(data, labels, shuffle=True, normalize=False)

#### Spring

In [None]:
builder = build.CheapestBuilder(weight_fun=weights.reciprocal)

In [None]:
embedder = embedding.SpringEmbedder()
embeddings = dim_reduction.reduce_dimension(data, builder, embedder)

In [None]:
visualization.show_data(embeddings, labels=labels, title='Spring on iris dataset', square=True, dpi=500, outpath='iris_spring.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Kamada Kawai

In [None]:
embedder = embedding.KamadaKawaiEmbedder()
embeddings = dim_reduction.reduce_dimension(data, builder, embedder, compute_graph=False)

In [None]:
visualization.show_data(embeddings, labels=labels, title='Kamada Kawai on iris dataset', square=True, dpi=500, outpath='iris_kamada.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Watch your step

In [None]:
embedder = embedding.WatchYourStepEmbedder()
embeddings = dim_reduction.reduce_dimension(data, builder, embedder, compute_graph=False, square=True)

In [None]:
visualization.show_data(embeddings, labels=labels, title='Watch Your Step on iris dataset')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### GraphSAGE

In [None]:
builder = build.CheapestBuilder(weight_fun=weights.get_reciprocal_pow(5), feature_fun=features.feature_coords)
embedder = embedding.GraphSAGEEmbedder()
embeddings = dim_reduction.reduce_dimension(data, builder, embedder)

In [None]:
visualization.show_data(embeddings, labels=labels, title='GraphSAGE on iris dataset', square=True, dpi=500, outpath='iris_graphsage.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Comparison: PCA

In [None]:
pca = PCA(n_components=2)
embeddings = pca.fit_transform(data)

In [None]:
visualization.show_data(embeddings, labels=labels, title='PCA on iris dataset', square=True, dpi=500, outpath='iris_pca.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Comparison: UMAP

In [None]:
umap_obj = umap.UMAP(n_components=2, random_state=42)
embeddings = umap_obj.fit_transform(data)

In [None]:
visualization.show_data(embeddings, labels=labels, title='UMAP on iris dataset', square=True, dpi=500, outpath='iris_umap.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

### Dataset: Digits

In [None]:
digits = load_digits()
data = digits.data
labels = digits.target
data, labels = preprocessing.remove_duplicities(data, labels, shuffle=True, normalize=False)

#### Spring

In [None]:
builder = build.CheapestBuilder(weight_fun=weights.reciprocal)

In [None]:
embedder = embedding.SpringEmbedder()
embeddings = dim_reduction.reduce_dimension(data, builder, embedder)

In [None]:
visualization.show_data(embeddings, labels=labels, title='Spring on digits dataset', square=True, dpi=500, outpath='digits_spring.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Kamada Kawai

In [None]:
embedder = embedding.KamadaKawaiEmbedder()
embeddings = dim_reduction.reduce_dimension(data, builder, embedder, compute_graph=False)

In [None]:
visualization.show_data(embeddings, labels=labels, title='Kamada Kawai on digits dataset', square=True, dpi=500, outpath='digits_kamada.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Watch your step

In [None]:
embedder = embedding.WatchYourStepEmbedder()
embeddings = dim_reduction.reduce_dimension(data, builder, embedder, compute_graph=False)

In [None]:
visualization.show_data(embeddings, labels=labels, title='Watch Your Step on digits dataset', square=True)

In [None]:
evaluation.print_evaluation(data, embeddings)

#### GraphSAGE

In [None]:
builder = build.CheapestBuilder(weight_fun=weights.get_reciprocal_pow(5), feature_fun=features.feature_coords)

In [None]:
embedder = embedding.GraphSAGEEmbedder()
embeddings = dim_reduction.reduce_dimension(data, builder, embedder)

In [None]:
visualization.show_data(embeddings, labels=labels, title='GraphSAGE on digits dataset', square=True, dpi=500, outpath='digits_graphsage.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Comparison: PCA

In [None]:
pca = PCA(n_components=2)
embeddings = pca.fit_transform(data)

In [None]:
visualization.show_data(embeddings, labels=labels, title='PCA on digits dataset', square=True, dpi=500, outpath='digits_pca.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Comparison: UMAP

In [None]:
umap_obj = umap.UMAP(n_components=2, random_state=42)
embeddings = umap_obj.fit_transform(data)

In [None]:
visualization.show_data(embeddings, labels=labels, title='UMAP on digits dataset', square=True, dpi=500, outpath='digits_umap.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

### Dataset: Swiss roll

In [None]:
from sklearn.datasets import make_swiss_roll

In [None]:
data, labels = make_swiss_roll(n_samples=1000, noise=0.0, random_state=0)
visualization.show_data(data, labels=labels, square=True, dpi=500, outpath='swissroll.png')

In [None]:
builder = build.CheapestBuilder(weight_fun=weights.get_reciprocal_pow(3), feature_fun=features.feature_coords)
builder.build(data)
visualization.show_data(data, graph=builder.graph, labels=labels, title='Cheapest graph on Swiss roll dataset', dpi=500, outpath='swissroll_graph.png')

#### Spring

In [None]:
embedder = embedding.SpringEmbedder()
embedder.embed(builder.graph)
embeddings = embedder.embeddings

In [None]:
visualization.show_data(embeddings, labels=labels, square=True, title='Spring on Swiss roll dataset', dpi=500, outpath='swissroll_spring.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Kamada Kawai

In [None]:
embedder = embedding.KamadaKawaiEmbedder()
embedder.embed(builder.graph)
embeddings = embedder.embeddings

In [None]:
visualization.show_data(embeddings, labels=labels, square=True, title='Kamada Kawai on Swiss roll dataset', dpi=500, outpath='swissroll_kamada.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### GraphSAGE

In [None]:
embedder = embedding.GraphSAGEEmbedder(epochs=4, num_samples=[10, 5], layer_sizes=[20, 2])
embedder.embed(builder.graph)
embeddings = embedder.embeddings

In [None]:
visualization.show_data(embeddings, labels=labels, square=True, title='GraphSAGE on Swiss roll dataset', square=True, dpi=500, outpath='swissroll_graphsage.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Comparison: PCA

In [None]:
pca = PCA(n_components=2)
embeddings = pca.fit_transform(data)

In [None]:
visualization.show_data(embeddings, labels=labels, square=True, title='PCA on Swiss roll dataset', dpi=500, outpath='swissroll_pca.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Comparison: UMAP

In [None]:
umap_obj = umap.UMAP(n_components=2, random_state=42)
embeddings = umap_obj.fit_transform(data)

In [None]:
visualization.show_data(embeddings, labels=labels, square=True, title='UMAP on Swiss roll dataset', dpi=500, outpath='swissroll_umap.png')

In [None]:
evaluation.print_evaluation(data, embeddings)

### Reduction to dimension 10
We show dimensionality reduction from dimension 64 to dimension 10. We use the Digits dataset.
We can not use Spring embedder, since it can only project to 2D. However, we can use Kamada Kawai and GraphSAGE.

In [None]:
digits = load_digits()
data = digits.data
labels = digits.target
data, labels = preprocessing.remove_duplicities(data, labels, shuffle=True, normalize=False)

#### Kamada Kawai

In [None]:
builder = build.CheapestBuilder(weight_fun=weights.reciprocal)

In [None]:
embedder = embedding.KamadaKawaiEmbedder(embedding_dim=10)
embeddings = dim_reduction.reduce_dimension(data, builder, embedder)

In [None]:
evaluation.print_evaluation(data, embeddings)

#### GraphSAGE

In [None]:
builder = build.CheapestBuilder(weight_fun=weights.get_reciprocal_pow(5), feature_fun=features.feature_coords)

In [None]:
embedder = embedding.GraphSAGEEmbedder(embedding_dim=10)
embeddings = dim_reduction.reduce_dimension(data, builder, embedder)

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Comparison: PCA

In [None]:
pca = PCA(n_components=10)
embeddings = pca.fit_transform(data)

In [None]:
evaluation.print_evaluation(data, embeddings)

#### Comparison: UMAP

In [None]:
umap_obj = umap.UMAP(n_components=10, random_state=42)
embeddings = umap_obj.fit_transform(data)

In [None]:
evaluation.print_evaluation(data, embeddings)