In [None]:
import os
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import numpy as np
import pandas as pd

In [None]:
def generate_embedding(dataset,
                       iterations,
                       perplexity,
                       pca_dim,
                       learning_rate,
                       verbose=1):
    path = f'../demo_embeddings/{dataset}/iterations_{iterations}/perplexity_{perplexity}/pca_{pca_dim}/learning_rate_{learning_rate}'

    def display(string):
        if verbose:
            print(string)

    if os.path.exists(path):
        if os.path.exists(path + '/data.csv'):
            display(f'{dataset} already exists.')
            return
    else:
        os.makedirs(path)

    data = pd.read_csv(f"../data/{dataset}_input.csv")
    labels = pd.read_csv(f"../data/{dataset}_labels.csv")
    nb_col = data.shape[1]

    pca = PCA(n_components=min(nb_col, pca_dim))
    data_pca = pca.fit_transform(data.values)

    tsne = TSNE(n_components=3,
                n_iter=iterations,
                learning_rate=learning_rate,
                perplexity=perplexity,
                random_state=1131)

    embedding = tsne.fit_transform(data_pca)

    embedding_df = pd.DataFrame(embedding, columns=['x', 'y', 'z'])

    embedding_df.index = labels.values

    embedding_df.to_csv(path + f'/data.csv')

    display(f'{path} has been generated.')

In [None]:
generate_embedding('mnist_3000', 250, 3, 25, 100, verbose=1)

In [None]:
dataset = 'mnist_3000'
iterations = 250
perplexity = 3
pca_dim = 25
learning_rate = 10

path = f'../demo_embeddings/{dataset}/iterations_{iterations}/perplexity_{perplexity}/pca_{pca_dim}/learning_rate_{learning_rate}'

pd.read_csv(path+f'/data.csv', index_col=0)

In [None]:
%timeit df = pd.read_csv("mnist_3000_input.csv")
%timeit df2 = pd.read_csv('../demo_embeddings/mnist_3000/iterations_250/perplexity_3/pca_25/learning_rate_10/data.csv')
%timeit combined_df = df.join(df2.loc[:,'x':'z']).set_index(['x','y','z'])

In [None]:
df2 = pd.read_csv('../demo_embeddings/mnist_3000/iterations_250/perplexity_3/pca_25/learning_rate_10/data.csv')
df2.head()

In [None]:
image = df.iloc[0]
matrix = image.values.reshape(28,28)

In [None]:
di = {'x': -3.510562, 'y': 1.200590}
def compare(coord):
    return df2[coord] == di[coord]

print(di['y'])
df2[compare('y')]

In [None]:
hoverData = { "points": [ { "x": 0.86785585, "y": 2.3639283, "z": 1.0667368, "curveNumber": 4, "pointNumber": 171, "text": "Digit 4" } ] }
hover_point_np = np.array([hoverData['points'][0][i] for i in ['x', 'y', 'z']]).astype(np.float64)
hover_point_np

In [None]:
mask = df2.loc[:,'x':'z'].eq(hover_point_np).all(axis=1)
df2[mask].index[0]

image = df.iloc[df2[mask].index[0]].values.reshape(28,28).astype(np.float64)
Image.fromarray(np.uint8(255 * image))

In [None]:
import base64
from PIL import Image
from io import BytesIO

def numpy_to_b64(array, scalar=True):
    if scalar:
        array = np.uint8(255 * array)
    
    im_pil = Image.fromarray(array)
    buff = BytesIO()
    im_pil.save(buff, format="png")
    im_b64 = base64.b64encode(buff.getvalue()).decode("utf-8")
    
    return im_b64

numpy_to_b64(image)