# Exploration Notebook for local Force-Directed Layouts (Kamada-Kawai, UCI-ID 186, Wine Quality)


## Imports

In [None]:

%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import seaborn as sb
import umap.plot
from ucimlrepo import fetch_ucirepo

In [None]:
import sys

sys.path.append("../")
import visualization
import processing
from embedding_obj import EmbeddingObj
from community import community_louvain

## Data Preparation

In [None]:
# fetch dataset (available at https://archive.ics.uci.edu/dataset/186/wine+quality)
image_segmentation = fetch_ucirepo(id=186)

# data (as pandas dataframes)
X = pd.DataFrame(image_segmentation.data.features, dtype="float32")
y = pd.DataFrame(image_segmentation.data.targets, dtype="float32")

target_feature = "pH"
labels = pd.Categorical(y["quality"]).codes

X.head()

In [None]:
connectivity_pairwise = processing.compute_pairwise_dists(X, [target_feature])
i_upper = np.triu_indices_from(connectivity_pairwise, k=1)
connectivity_pairwise = connectivity_pairwise[i_upper]

fig, ax = plt.subplots(figsize=(20, 4))
sb.histplot(connectivity_pairwise.flatten(), bins=100, ax=ax)
ax.set_title("Histogram of pairwise similarity")
ax.set_xlabel("Quantity")
ax.set_ylabel("Amount")

fig.tight_layout()
fig.show()

## Compute initial UMAP-Embedding & Louvain Partition

In [None]:
umap_mapper = umap.UMAP(n_neighbors=15, min_dist=1, random_state=0)
umap_mapper.fit(X)
umap_embedding = umap_mapper.embedding_
connectivity_umap_knn = umap_mapper.graph_

In [None]:
graph_umap_knn = nx.Graph(connectivity_umap_knn)
edge_weights_umap_knn = [
    graph_umap_knn[u][v]["weight"] for u, v in graph_umap_knn.edges()
]

graph_knn_feat, edge_weights_knn_feat = processing.compute_knn_graph(X, [
    target_feature], n_neighbors=5)

knn_list = [
    EmbeddingObj(graph=graph_umap_knn,
                 embedding={i: coords for i, coords in enumerate(umap_embedding)},
                 edge_weights=edge_weights_umap_knn,
                 title="UMAP-Positions with UMAP-kNN"),
    EmbeddingObj(graph=graph_knn_feat,
                 embedding={i: coords for i, coords in enumerate(umap_embedding)},
                 edge_weights=edge_weights_knn_feat,
                 title=f"UMAP-Positions with kNN for Similarity of '{target_feature}'")
]

for embedding_obj in knn_list:
    embedding_obj.com_partition = community_louvain.best_partition(
        embedding_obj.sim_graph, random_state=0)

    embedding_obj.labels = embedding_obj.com_partition

In [None]:
visualization.display_graphs(knn_list,
                             figsize_columns=2,
                             figsize=(20, 8),
                             cmap='viridis',
                             show_cbar=False)

## Visualize embeddings

In [None]:
pairwise_distances = processing.compute_pairwise_dists(X, [target_feature],
                                                       invert=False)

connectivity_umap_knn_nx = nx.Graph(connectivity_umap_knn)
for u, v in connectivity_umap_knn_nx.edges():
    connectivity_umap_knn_nx[u][v]["weight"] = pairwise_distances[u][v]

In [None]:
iterations = [1]
method = "kawai"

### 2.1 No MST, no threshold

In [None]:
embeddings, partition_dict = processing.compute_local_force_directed(
    connectivity_umap_knn_nx,
    umap_embedding,
    iterations=iterations, method=method, pairwise_dists=pairwise_distances)

In [None]:
# partition_values = embeddings[0].labels
partition_values = set(partition_dict.values())

In [None]:
visualization.display_graphs(embeddings,
                             figsize_columns=2,
                             figsize=(20, 10),
                             show_edges=True,
                             cmap='viridis',
                             # cbar_labels=partition_values,
                             show_cbar=False,
                             show_partition_centers=True)

In [None]:
visualization.display_graphs([embeddings[1]],
                             figsize_columns=1,
                             figsize=(15, 15),
                             cmap='viridis',
                             cbar_labels=partition_values,
                             show_cbar=False,
                             show_partition_centers=True
                             )

### 2.2 No MST, threshold at 0.1

In [None]:
embeddings_threshold, _ = processing.compute_local_force_directed(
    connectivity_umap_knn_nx,
    umap_embedding,
    iterations=iterations,
    method=method,
    threshold=0.1,
    pairwise_dists=pairwise_distances)

In [None]:
visualization.display_graphs(embeddings_threshold,
                             figsize_columns=2,
                             figsize=(20, 10),
                             show_edges=True,
                             cmap='viridis',
                             cbar_labels=partition_values,
                             show_cbar=False)

In [None]:
visualization.display_graphs([embeddings_threshold[1]],
                             figsize_columns=1,
                             figsize=(15, 15),
                             cmap='viridis',
                             cbar_labels=partition_values,
                             show_cbar=False,
                             show_partition_centers=True)

### 2.3 MST, no threshold

In [None]:
embeddings_mst, _ = processing.compute_local_force_directed(connectivity_umap_knn_nx,
                                                            umap_embedding,
                                                            iterations=iterations,
                                                            method=method,
                                                            mst=True,
                                                            pairwise_dists=pairwise_distances)

In [None]:
visualization.display_graphs(embeddings_mst,
                             figsize_columns=2,
                             figsize=(20, 10),
                             show_edges=True,
                             cmap='viridis',
                             cbar_labels=partition_values,
                             show_cbar=False)

In [None]:
visualization.display_graphs([embeddings_mst[1]],
                             figsize_columns=1,
                             figsize=(15, 15),
                             cmap='viridis',
                             cbar_labels=partition_values,
                             show_cbar=False)

### 2.4 No MST, use boundary-edges

In [None]:
embeddings_boundaries, _ = processing.compute_local_force_directed(
    connectivity_umap_knn_nx,
    umap_embedding,
    iterations=iterations,
    method=method,
    boundary_edges=True,
    pairwise_dists=pairwise_distances)

In [None]:
visualization.display_graphs(embeddings_boundaries,
                             figsize_columns=2,
                             figsize=(20, 10),
                             show_edges=True,
                             cmap='viridis',
                             cbar_labels=partition_values,
                             show_cbar=False)

In [None]:
visualization.display_graphs([embeddings_boundaries[1]],
                             figsize_columns=1,
                             figsize=(15, 15),
                             cmap='viridis',
                             cbar_labels=partition_values,
                             show_cbar=False,
                             show_partition_centers=True)

### 2.5 MST, use boundary-edges

In [None]:
embeddings_mst_boundaries, _ = processing.compute_local_force_directed(
    connectivity_umap_knn_nx,
    umap_embedding,
    iterations=iterations,
    method=method,
    mst=True,
    boundary_edges=True,
    pairwise_dists=pairwise_distances)

In [None]:
visualization.display_graphs(embeddings_mst_boundaries,
                             figsize_columns=2,
                             figsize=(20, 10),
                             show_edges=False,
                             cmap='viridis',
                             cbar_labels=partition_values,
                             show_cbar=False)

In [None]:
visualization.display_graphs([embeddings_mst_boundaries[1]],
                             figsize_columns=1,
                             figsize=(15, 15),
                             cmap='viridis',
                             cbar_labels=partition_values,
                             show_cbar=False,
                             show_partition_centers=True)

### Error-Search: Missing Edges

In [None]:
original_edges = set(connectivity_umap_knn_nx.edges())
result_edges = set(embeddings[1].sim_graph.edges())
print("Missing edges:", len(original_edges - result_edges))

In [None]:
missing_graph = nx.Graph(connectivity_umap_knn_nx)
missing_graph.clear_edges()

missing_edges = original_edges - result_edges
missing_graph.add_edges_from(missing_edges)

In [None]:
doppelte = []

for u, v in connectivity_umap_knn_nx.edges():
    if connectivity_umap_knn_nx.has_edge(v, u) and connectivity_umap_knn_nx.has_edge(u,
                                                                                     v):
        doppelte.append((u, v))

print(len(doppelte))

In [None]:
import numpy as np

visualization.display_graphs(
    [EmbeddingObj(graph=missing_graph, embedding=umap_embedding,
                  edge_weights=np.ndarray([0]),
                  title="UMAP-Positions with UMAP-kNN")],
    figsize_columns=1,
    figsize=(20, 10),
    show_edges=True,
    cmap='viridis',
    # cbar_labels=partition_values,
    show_cbar=False)