In [None]:
import torchvision
import numpy as np

cifar10 = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)
X, y = cifar10.data.transpose(0,3,1,2)[:2000], np.array(cifar10.targets)[:2000]
X.shape, y.shape

In [None]:
X = X.reshape(X.shape[0], -1)  # TODO：目前是直接对原图做降维，理论上应该是经过特征提取器处理后的embedding
X.shape

In [None]:
# keep the same number of samples for each class
labels, sample_num = np.unique(y, return_counts=True)
sample_num = np.min(sample_num)
X = np.concatenate([X[y == label][:sample_num] for label in labels])
y = np.concatenate([y[y == label][:sample_num] for label in labels])
X.shape, y.shape

In [None]:
%%time
from methods import AE, PCA, TSNE, UMAP, LLE, Isomap, MDS, SE
from visualize import plot_2d_scatter, plot_2d_scatter_matrix, plot_mixed, plot_dim_reduct_acc_comparison
from evaluate import evaluate_similarity_matching
from dim import Estimater

m = 100
dist = 'Euclidean'
score_original, t_original = evaluate_similarity_matching(X, y, N=sample_num, m=m, method=dist, p=3)
print(f'original: {score_original*100:.2f}%, {t_original*1e3:.2f}ms')

dim = []
for algo in ['MLE', 'MADA', 'MiND', 'CorrInt','TwoNN', 'TLE', 'MOM', 'FisherS']:
    estimater = Estimater(method=algo)
    dim.append(estimater.fit(X).transform(X))
    print(algo, f'{dim[-1]:.1f}')
print('Mean', f'{np.mean(dim):.1f}')

n_component = round(np.mean(dim))

In [None]:
%%time

ae = AE(
    n_latent=n_component,
    n_encoders=[X.shape[-1], 32],
    n_decoders=[32, X.shape[-1]],
    activation="relu",
    max_epoch=5000,
    patience=100,
    device="cuda:2"
)
X_reduced_ae = ae.fit_transform(X)
score_reduced_ae, t_reduced_ae = evaluate_similarity_matching(X_reduced_ae, y, N=sample_num, m=m, method=dist, p=3)

fig_ae = plot_mixed(data=X_reduced_ae,
    color=y,
    marker_size=2,
    title="AE",
    zoom=1.0,
    label='digit',
    score1=score_original,
    score2=score_reduced_ae,
    t1=t_original,
    t2=t_reduced_ae,
    dim1=X.shape[1], 
    dim2=n_component,
)

In [None]:
%%time

pca = PCA(n_components=n_component)
X_reduced_pca = pca.fit_transform(X)
score_reduced_pca, t_reduced_pca = evaluate_similarity_matching(X_reduced_pca, y, N=sample_num, m=m, method=dist, p=3)

fig_pca = plot_mixed(data=X_reduced_pca,
    color=y,
    marker_size=2,
    title="PCA",
    zoom=1.0,
    label='digit',
    score1=score_original,
    score2=score_reduced_pca,
    t1=t_original,
    t2=t_reduced_pca,
    dim1=X.shape[1],
    dim2=n_component,
)

In [None]:
%%time

isomap = Isomap(n_components=n_component)
X_reduced_isomap = isomap.fit_transform(X)
score_reduced_isomap, t_reduced_isomap = evaluate_similarity_matching(X_reduced_isomap, y, N=sample_num, m=m, method=dist, p=3)


fig_isomap = plot_mixed(data=X_reduced_isomap,
    color=y,
    marker_size=2,
    title="Isomap",
    zoom=1.0,
    label='digit',
    score1=score_original,
    score2=score_reduced_isomap,
    t1=t_original,
    t2=t_reduced_isomap,
    dim1=X.shape[1],
    dim2=n_component,
)

In [None]:
%%time

lle = LLE(n_components=n_component)
X_reduced_lle = lle.fit_transform(X)
score_reduced_lle, t_reduced_lle = evaluate_similarity_matching(X_reduced_lle, y, N=sample_num, m=m, method=dist, p=3)


fig_lle = plot_mixed(data=X_reduced_lle,
    color=y,
    marker_size=2,
    title="LLE",
    zoom=1.0,
    label='digit',
    score1=score_original,
    score2=score_reduced_lle,
    t1=t_original,
    t2=t_reduced_lle,
    dim1=X.shape[1],
    dim2=n_component,
)

In [None]:
%%time

mds = MDS(n_components=n_component)
X_reduced_mds = mds.fit_transform(X)
score_reduced_mds, t_reduced_mds = evaluate_similarity_matching(X_reduced_mds, y, N=sample_num, m=m, method=dist, p=3)


fig_mds = plot_mixed(data=X_reduced_mds,
    color=y,
    marker_size=2,
    title="MDS",
    zoom=1.0,
    label='digit',
    score1=score_original,
    score2=score_reduced_mds,
    t1=t_original,
    t2=t_reduced_mds,
    dim1=X.shape[1],
    dim2=n_component,
)

In [None]:
%%time

se = SE(n_components=n_component)
X_reduced_se = se.fit_transform(X)
score_reduced_se, t_reduced_se = evaluate_similarity_matching(X_reduced_se, y, N=sample_num, m=m, method=dist, p=3)


fig_se = plot_mixed(data=X_reduced_se,
    color=y,
    marker_size=2,
    title="SE",
    zoom=1.0,
    label='digit',
    score1=score_original,
    score2=score_reduced_se,
    t1=t_original,
    t2=t_reduced_se,
    dim1=X.shape[1],
    dim2=n_component,
)

In [None]:
%%time

umap = UMAP(n_components=n_component)
X_reduced_umap = umap.fit_transform(X)
score_reduced_umap, t_reduced_umap = evaluate_similarity_matching(X_reduced_umap, y, N=sample_num, m=m, method=dist, p=3)


fig_umap = plot_mixed(data=X_reduced_umap,
    color=y,
    marker_size=2,
    title="UMAP",
    zoom=0.7,
    label='digit',
    score1=score_original,
    score2=score_reduced_umap,
    t1=t_original,
    t2=t_reduced_umap,
    dim1=X.shape[1],
    dim2=n_component,
)

In [None]:
%%time

tsne = TSNE(n_components=n_component)
X_reduced_tsne = tsne.fit_transform(X)
score_reduced_tsne, t_reduced_tsne = evaluate_similarity_matching(X_reduced_tsne, y, N=sample_num, m=m, method=dist, p=3)


fig_tsne = plot_mixed(data=X_reduced_tsne,
    color=y,
    marker_size=2,
    title="TSNE",
    zoom=1.0,
    label='digit',
    score1=score_original,
    score2=score_reduced_tsne,
    t1=t_original,
    t2=t_reduced_tsne,
    dim1=X.shape[1],
    dim2=n_component,
)

In [None]:
names = ["Original", "AE", "PCA", "LLE", "Isomap", "MDS", "SE", "UMAP", "TSNE"]
scores = [score_original, score_reduced_ae, score_reduced_pca, score_reduced_lle, score_reduced_isomap, score_reduced_mds, score_reduced_se, score_reduced_umap, score_reduced_tsne]

plot_dim_reduct_acc_comparison(names, scores, dist=dist, score_original=score_original)