In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

import json
import random

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap import UMAP

!pip install --upgrade nbformat



  from .autonotebook import tqdm as notebook_tqdm




In [2]:
with open('data/process/human_embeddings.json', 'r') as f:
    data = json.load(f)
    df = pd.DataFrame(data).transpose()

print(type(data))
print(df.shape)

<class 'dict'>
(75, 768)


In [3]:
X = np.stack(list(data.values()))

print(X.shape) # num_names by embedding dimension

(75, 768)


In [4]:
pca_2d = PCA(n_components = 2)
X_reduced_pca_2d = pca_2d.fit_transform(X)

df_pca_2d = df.copy()

df_pca_2d['x'] = X_reduced_pca_2d[:, 0]
df_pca_2d['y'] = X_reduced_pca_2d[:, 1]

fig = px.scatter(df_pca_2d.reset_index(), x = 'x', y = 'y', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/pca_2d_human_embeddings.html")

pca_3d = PCA(n_components = 3)
X_reduced_pca_3d = pca_3d.fit_transform(X)

df_pca_3d = df.copy()

df_pca_3d['x'] = X_reduced_pca_3d[:, 0]
df_pca_3d['y'] = X_reduced_pca_3d[:, 1]
df_pca_3d['z'] = X_reduced_pca_3d[:, 2]

fig = px.scatter_3d(df_pca_3d.reset_index(), x = 'x', y = 'y', z = 'z', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/pca_3d_human_embeddings.html")

In [5]:
tsne_2d = TSNE(n_components = 2, perplexity = 30, metric = 'cosine', max_iter = 1000, learning_rate = 100)
X_reduced_tsne_2d = tsne_2d.fit_transform(X)

df_tsne_2d = df.copy()

df_tsne_2d['x'] = X_reduced_tsne_2d[:, 0]
df_tsne_2d['y'] = X_reduced_tsne_2d[:, 1]

fig = px.scatter(df_tsne_2d.reset_index(), x = 'x', y = 'y', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/tsne_2d_human_embeddings.html")

tsne_3d = TSNE(n_components = 3, perplexity = 30, metric = 'cosine', max_iter = 1000, learning_rate = 100)
X_reduced_tsne_3d = tsne_3d.fit_transform(X)

df_tsne_3d = df.copy()

df_tsne_3d['x'] = X_reduced_tsne_3d[:, 0]
df_tsne_3d['y'] = X_reduced_tsne_3d[:, 1]
df_tsne_3d['z'] = X_reduced_tsne_3d[:, 2]

fig = px.scatter_3d(df_tsne_3d.reset_index(), x = 'x', y = 'y', z = 'z', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/tsne_3d_human_embeddings.html")



In [6]:
umap_2d = UMAP(n_components = 2, metric = 'cosine', n_neighbors = 10, min_dist = 0.1)
X_reduced_umap_2d = umap_2d.fit_transform(X)

df_umap_2d = df.copy()

df_umap_2d['x'] = X_reduced_umap_2d[:, 0]
df_umap_2d['y'] = X_reduced_umap_2d[:, 1]

fig = px.scatter(df_umap_2d.reset_index(), x = 'x', y = 'y', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/umap_2d_human_embeddings.html")

umap_3d = UMAP(n_components = 3, metric = 'cosine', n_neighbors = 30, min_dist = 0.05)
X_reduced_umap_3d = umap_3d.fit_transform(X)

df_umap_3d = df.copy()

df_umap_3d['x'] = X_reduced_umap_3d[:, 0]
df_umap_3d['y'] = X_reduced_umap_3d[:, 1]
df_umap_3d['z'] = X_reduced_umap_3d[:, 2]

fig = px.scatter_3d(df_umap_3d.reset_index(), x = 'x', y = 'y', z = 'z', hover_name = 'index', color = 'index')
fig.write_html("data/postprocess/umap_3d_human_embeddings.html")


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.






'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.





In [7]:
with open('data/process/member_to_member_referrals_given_75_100.json', 'r') as f:
    data_dict = json.load(f)

In [8]:
# Generate random 3D positions
all_nodes = set(data_dict.keys())
for values in data_dict.values():
    all_nodes.update(values)

node_positions = {
    node: np.array([
        random.uniform(-10, 10),
        random.uniform(-10, 10),
        random.uniform(-10, 10)
    ])
    for node in all_nodes
}

# Scatter3D node positions
points_trace = go.Scatter3d(
    x = [pos[0] for pos in node_positions.values()],
    y = [pos[1] for pos in node_positions.values()],
    z = [pos[2] for pos in node_positions.values()],
    mode = 'markers+text',
    marker = dict(size = 5, color = 'blue'),
    text = list(node_positions.keys()),
    textposition = "top center"
)

# Arrows = line + cone
line_x, line_y, line_z = [], [], []
cone_origins = []
cone_directions = []

for referrer, referees in data_dict.items():
    origin = node_positions[referrer]
    for referee in referees:
        destination = node_positions[referee]
        vector = destination - origin

        # Line segment from origin to destination
        line_x += [origin[0], destination[0], None]
        line_y += [origin[1], destination[1], None]
        line_z += [origin[2], destination[2], None]

        # Cone at the end, pointing back toward origin
        cone_origins.append(destination - 0.2 * vector)  # move back a bit for visual clarity
        cone_directions.append(0.2 * vector / np.linalg.norm(vector))  # normalized small cone

# Stack cone data
cone_origins = np.array(cone_origins)
cone_directions = np.array(cone_directions)

lines_trace = go.Scatter3d(
    x = line_x,
    y = line_y,
    z = line_z,
    mode = 'lines',
    line = dict(color = 'purple', width = 3)
)

cones_trace = go.Cone(
    x = cone_origins[:, 0],
    y = cone_origins[:, 1],
    z = cone_origins[:, 2],
    u = cone_directions[:, 0],
    v = cone_directions[:, 1],
    w = cone_directions[:, 2],
    sizemode = "absolute",
    sizeref = 0.1,
    anchor = "tail",
    colorscale = "Reds",
    showscale = False
)

# Final figure
fig = go.Figure(data = [points_trace, lines_trace, cones_trace])
fig.update_layout(
    scene = dict(
        xaxis = dict(title = 'X'),
        yaxis = dict(title = 'Y'),
        zaxis = dict(title = 'Z')
    ),
    title = "3D Arrow Galaxy: Referrers → Referrees",
    margin = dict(l = 0, r = 0, b = 0, t = 50)
)

fig.write_html("data/postprocess/referrer_referree_3d_arrows.html")