In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from mpl_toolkits.mplot3d import Axes3D
import mplcursors
import seaborn as sns

import json

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap import UMAP

In [11]:
with open('data/process/human_embeddings.json', 'r') as f:
    data = json.load(f)
    df = pd.DataFrame(data).transpose()

print(type(data))
print(df.shape)

<class 'dict'>
(75, 768)


In [12]:
X = np.stack(list(data.values()))

print(X.shape) # num_names by embedding dimension

(75, 768)


In [13]:
pca_2d = PCA(n_components = 2)
X_reduced_pca_2d = pca_2d.fit_transform(X)

df_pca_2d = df.copy()

df_pca_2d['x'] = X_reduced_pca_2d[:, 0]
df_pca_2d['y'] = X_reduced_pca_2d[:, 1]

fig = px.scatter(df_pca_2d.reset_index(), x = 'x', y = 'y', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/pca_2d_human_embeddings.html")

pca_3d = PCA(n_components = 3)
X_reduced_pca_3d = pca_3d.fit_transform(X)

df_pca_3d = df.copy()

df_pca_3d['x'] = X_reduced_pca_3d[:, 0]
df_pca_3d['y'] = X_reduced_pca_3d[:, 1]
df_pca_3d['z'] = X_reduced_pca_3d[:, 2]

fig = px.scatter_3d(df_pca_3d.reset_index(), x = 'x', y = 'y', z = 'z', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/pca_3d_human_embeddings.html")

In [24]:
tsne_2d = TSNE(n_components = 2, perplexity = 30, metric = 'cosine', max_iter = 1000, learning_rate = 100)
X_reduced_tsne_2d = tsne_2d.fit_transform(X)

df_tsne_2d = df.copy()

df_tsne_2d['x'] = X_reduced_tsne_2d[:, 0]
df_tsne_2d['y'] = X_reduced_tsne_2d[:, 1]

fig = px.scatter(df_tsne_2d.reset_index(), x = 'x', y = 'y', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/tsne_2d_human_embeddings.html")

tsne_3d = TSNE(n_components = 3, perplexity = 30, metric = 'cosine', max_iter = 1000, learning_rate = 100)
X_reduced_tsne_3d = tsne_3d.fit_transform(X)

df_tsne_3d = df.copy()

df_tsne_3d['x'] = X_reduced_tsne_3d[:, 0]
df_tsne_3d['y'] = X_reduced_tsne_3d[:, 1]
df_tsne_3d['z'] = X_reduced_tsne_3d[:, 2]

fig = px.scatter_3d(df_tsne_3d.reset_index(), x = 'x', y = 'y', z = 'z', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/tsne_3d_human_embeddings.html")



In [30]:
umap_2d = UMAP(n_components = 2, metric = 'cosine', n_neighbors = 10, min_dist = 0.1)
X_reduced_umap_2d = umap_2d.fit_transform(X)

df_umap_2d = df.copy()

df_umap_2d['x'] = X_reduced_umap_2d[:, 0]
df_umap_2d['y'] = X_reduced_umap_2d[:, 1]

fig = px.scatter(df_umap_2d.reset_index(), x = 'x', y = 'y', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/umap_2d_human_embeddings.html")

umap_3d = UMAP(n_components = 3, metric = 'cosine', n_neighbors = 30, min_dist = 0.05)
X_reduced_umap_3d = umap_3d.fit_transform(X)

df_umap_3d = df.copy()

df_umap_3d['x'] = X_reduced_umap_3d[:, 0]
df_umap_3d['y'] = X_reduced_umap_3d[:, 1]
df_umap_3d['z'] = X_reduced_umap_3d[:, 2]

fig = px.scatter_3d(df_umap_3d.reset_index(), x = 'x', y = 'y', z = 'z', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/umap_3d_human_embeddings.html")


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.






'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



