# follow along:

`git clone https://github.com/tiga1231/dimbridge-jupyter.git`

In [None]:
!pip install dimbridge

!python -m venv .venv
!source .venv/bin/activate

In [None]:
# This example needs additional packages:
!pip install matplotlib umap-learn

In [None]:
# for development:
# !pip install -e ".[dev]"

In [None]:
# %load_ext autoreload
# %autoreload 2
# %env ANYWIDGET_HMR=1
from glob import glob

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

%matplotlib inline

import numpy as np
import pandas as pd
from umap import UMAP

from dimbridge import Dimbridge

plt.style.use("ggplot")
plt.style.use("seaborn-v0_8-colorblind")

## Minimal example: Synthetic Data

In [None]:
# n = int(1e4)

# ## data
# R = 2
# P = 3
# eps = 0.5
# u = np.random.rand(n) * np.pi * 2
# v = np.random.rand(n) * np.pi * 2

# x = R * (np.cos(u / 2) * np.cos(v) - np.sin(u / 2) * np.sin(2 * v))
# y = R * (np.sin(u / 2) * np.cos(v) + np.cos(u / 2) * np.sin(2 * v))
# z = P * np.cos(u) * (1 + eps * np.sin(v))
# w = P * np.sin(u) * (1 + eps * np.sin(v))

# ## construct pandas dataframe and compute UMAP
# df = pd.DataFrame(dict(x1=x, x2=y, x3=z, x4=w))
# # xy = UMAP(n_neighbors=50, min_dist=0.3).fit_transform(df.to_numpy())
# xy = np.c_[u, v]

# ## validate UMAP
# plt.figure(figsize=[3, 3])
# plt.scatter(xy[:, 0], xy[:, 1], s=1)
# plt.axis("equal")
# plt.show()

In [None]:
# # for dev testing:
# from importlib import reload
# import dimbridge

# reload(dimbridge)
# from dimbridge import Dimbridge

# dimbridge = Dimbridge(
#     data=df,
#     x=xy[:, 0],
#     y=xy[:, 1],
#     s=4,  # projection plot mark size
#     splom_s=1,
#     predicate_mode="data extent",  # "data extent", "predicate regression"
#     brush_mode="single",  # 'single', "contrastive", "curve",
# )
# dimbridge

## Animal Data in the paper

- Download images of animals https://drive.google.com/drive/folders/1x1Ptvpoay4YsM6IrtuDr11iYtkrv8nzI unzip, and copy it to dataset/

In [None]:
# dataset_name = "gait2"
# df = pd.read_csv(f"./datasets/{dataset_name}/{dataset_name}.csv")
# df = df.drop(columns=["x", "y"])
# for col in df.columns:
#     if df[col].dtype == "int64":
#         df[col] = df[col].astype("int32")
# # xy = np.c_[df["leg1.joint1.angle"].to_numpy(), df["leg1.joint2.angle"].to_numpy()]
# numeric_columns = [col for col in df.columns if df[col].dtype != "int32"]
# xy = UMAP(n_neighbors=30, min_dist=0.2).fit_transform(df[numeric_columns].to_numpy())


dataset_name = "animals5_remote"
df = pd.read_csv("./datasets/animals5_remote/animals5.csv")
image_urls = df["image_url"].to_list()
xy = df[["x", "y"]].to_numpy()
df = df.drop(columns=["x", "y", "image_filename", "image_url"])
# xy = UMAP(n_neighbors=50, min_dist=0.8).fit_transform(df.to_numpy())

df

In [None]:
## validate UMAP
plt.figure(figsize=[3, 3])
plt.scatter(xy[:, 0], xy[:, 1], s=1)
plt.axis("equal")
plt.show()

In [None]:
# for dev testing:
from importlib import reload
import dimbridge

reload(dimbridge)
from dimbridge import Dimbridge

dimbridge = Dimbridge(
    data=df,
    image_urls=image_urls,
    x=xy[:, 0],
    y=xy[:, 1],
    s=4,  # projection plot mark size
    splom_s=2,  # SPLOM plot mark size
    predicate_mode="predicate regression",  # "data extent", "predicate regression"
    brush_mode="contrastive",  # 'single', "contrastive", "curve",
)

dimbridge

## Getting subset from UI

In [None]:
print(dimbridge.selected[0])

In [None]:
selected = np.array(dimbridge.selected[0])
df[selected]

In [None]:
plt.scatter(xy[selected, 0], xy[selected, 1])
plt.axis("equal")

# Subset UMAP

In [None]:
subset_xy = UMAP(n_neighbors=50).fit_transform(df[selected])
plt.scatter(subset_xy[:, 0], subset_xy[:, 1])
plt.axis("equal")
plt.show()

In [None]:
dimbridge2 = Dimbridge(
    data=df[selected],
    image_urls=np.array(image_urls)[selected].tolist(),
    x=subset_xy[:, 0],
    y=subset_xy[:, 1],
    s=4,  # projection plot mark size
    splom_s=2,  # SPLOM plot mark size
    predicate_mode="predicate regression",  # "data extent", "predicate regression"
    brush_mode="curve",  # 'single', "contrastive", "curve",
)

dimbridge2

In [None]:
plt.scatter(df["mouth open"][~selected], df["eyes squinting"][~selected], c="#aaa")
plt.scatter(df["mouth open"][selected], df["eyes squinting"][selected], c="C0")
plt.axis("equal")

# subset PCA

In [None]:
from sklearn.decomposition import PCA

pca = PCA().fit(df[selected])
pc = pca.transform(df)

plt.scatter(pc[~selected, 0], pc[~selected, 1], s=10, c="#aaa")
plt.scatter(pc[selected, 0], pc[selected, 1], s=10)

# LinearDiscriminantAnalysis

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis().fit(df, selected)
lda_x = lda.transform(df)
plt.scatter(lda_x[~selected], df["mouth open"][~selected], s=10, c="#aaa")
plt.scatter(lda_x[selected], df["mouth open"][selected], s=10)
plt.show()

In [None]:
list(zip(df.columns, lda.coef_[0]))

In [None]:
# todo export svg / pdf
# todo how-to, readme
#