<a href="https://colab.research.google.com/github/mtrefilek/cs762/blob/main/tsne.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [65]:
from __future__ import print_function
import time
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

In [66]:
dataset_file = "MNIST_clip-vit-base-patch32_train"
data = np.load(dataset_file+".npz", allow_pickle=True)
feature_matrices = np.asarray(data["feature_matrices"])
feat_cols = [ 'feature'+str(i) for i in range(feature_matrices[0][0].size) ]
df = pd.DataFrame(feature_matrices[0],columns=feat_cols)
df['y'] = data["classnames"][0]
for i in range(1,feature_matrices.size):
  df2 = pd.DataFrame(feature_matrices[i],columns=feat_cols)
  df2['y'] = data["classnames"][i]
  df = df.append(df2)
df['label'] = df['y'].apply(lambda i: str(i))
df = df.reset_index(drop=True)

np.random.seed(42)
rndperm = np.random.permutation(df.shape[0])

N = 10000
df_subset = df.reindex(rndperm[:N]).copy()
data_subset = df_subset[feat_cols].values




In [None]:
time_start = time.time()
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(data_subset)
print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))



[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 10000 samples in 0.006s...
[t-SNE] Computed neighbors for 10000 samples in 4.383s...
[t-SNE] Computed conditional probabilities for sample 1000 / 10000
[t-SNE] Computed conditional probabilities for sample 2000 / 10000
[t-SNE] Computed conditional probabilities for sample 3000 / 10000
[t-SNE] Computed conditional probabilities for sample 4000 / 10000
[t-SNE] Computed conditional probabilities for sample 5000 / 10000
[t-SNE] Computed conditional probabilities for sample 6000 / 10000
[t-SNE] Computed conditional probabilities for sample 7000 / 10000
[t-SNE] Computed conditional probabilities for sample 8000 / 10000
[t-SNE] Computed conditional probabilities for sample 9000 / 10000
[t-SNE] Computed conditional probabilities for sample 10000 / 10000
[t-SNE] Mean sigma: 0.643370


In [None]:
df_subset['tsne-2d-one'] = tsne_results[:,0]
df_subset['tsne-2d-two'] = tsne_results[:,1]
plt.figure(figsize=(16,10))
sns.scatterplot(
    x="tsne-2d-one", y="tsne-2d-two",
    hue="y",
    palette=sns.color_palette("hls", 10),
    data=df_subset,
    legend="full",
    alpha=0.3
)