# TSNE node types visualizations

In [4]:
import os
from glob import glob
from tqdm.auto import tqdm
import numpy as np

Here we are going to use the [tsne-cuda](https://github.com/CannyLab/tsne-cuda) implementation from Berkeley, which is the fastest I know.

Since it may be not the easiest one to install, we make available all the rendered tsne embeddings in the folder "tsne" within the same repository.

In [5]:
try:
    from tsnecuda import TSNE
except ModuleNotFoundError:
    from MulticoreTSNE import MulticoreTSNE as TSNE

We retrieve all the embeddings to further embed using TSNE

In [6]:
paths = glob("*_embedding.npy")

## Executing the TSNE embeddings

We compute TSNE for different values of perplexity to check if the embedding is stable.

In [7]:
perplexities = (20, 50, 80, 200, 500)

os.makedirs("tsne", exist_ok=True)

for path in tqdm(paths, desc="Embedding all paths"):
    for perplexity in tqdm(perplexities, desc="Perplexities", leave=False):
        tsne_path = f"tsne/{perplexity}_{path}"
        if os.path.exists(tsne_path):
            continue
        tsne = TSNE(
            perplexity=perplexity
        )
        np.save(
            tsne_path,
            tsne.fit_transform(np.load(path))
        )

HBox(children=(FloatProgress(value=0.0, description='Embedding all paths', max=4.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='Perplexities', max=5.0, style=ProgressStyle(description_w…

HBox(children=(FloatProgress(value=0.0, description='Perplexities', max=5.0, style=ProgressStyle(description_w…

HBox(children=(FloatProgress(value=0.0, description='Perplexities', max=5.0, style=ProgressStyle(description_w…

HBox(children=(FloatProgress(value=0.0, description='Perplexities', max=5.0, style=ProgressStyle(description_w…




## Loading the graph
Here we load, as done previously, the PPI graph data.

In [8]:
def filter_embedding(graph:EnsmallenGraph, embedding:np.array, ):
    # Get the node type ids
    node_types_ids = [
        graph.get_node_type_id(node_id)
        for node_id in range(graph.get_nodes_number())
    ]
    
    # Obtain the most common ones
    common_node_types = list(dict(
        sorted(Counter(node_types_ids).items(), key=lambda x: x[1], reverse=True)[:10]
    ).keys())
    common_node_types_names = [
        graph.node_types_reverse_mapping[common_node_type]
        for common_node_type in common_node_types
    ]
    common_node_types_set = common_node_types
    colors = list(TABLEAU_COLORS.keys())

    filtered_node_ids, filtered_node_types_ids = list(zip(*[
        (node_id, node_type)
        for node_id, node_type in enumerate(node_types_ids)
        if node_type in common_node_types_set and not graph.is_node_trap(node_id)
    ]))

    return embedding[list(filtered_node_ids)].T, filtered_node_types_ids, common_node_types_names

NameError: name 'EnsmallenGraph' is not defined

In [9]:
def plot_embedding(
    graph:EnsmallenGraph,
    tsne_embedding:np.ndarray,
    embedding_name:str,
    directory:str,
    epochs:int,
    model:str,
    embedding_type:str
):
    if graph.node_types_mapping is not None:
        (x, y), node_types, common_node_types_names = filter_embedding(graph, tsne_embedding)
    else:
        (x, y) = tsne_embedding.T
        node_types = np.zeros(graph.get_nodes_number(), dtype=np.uint8)
        common_node_types_names = ["No node type provided"]

    fig, axes = plt.subplots(figsize=(10,10), dpi=200)
    colors = list(TABLEAU_COLORS.keys())[:len(common_node_types_names)]
    
    axes.set_title(f"{embedding_type} {embedding_name} embedding for {directory}, obtained using {model} trained for {epochs} epochs")
    scatter = axes.scatter(x, y, s=0.25, c=node_types, cmap=ListedColormap(colors))
    plt.legend(handles=scatter.legend_elements()[0], labels=common_node_types_names)
    plt.savefig(f"{directory}_{embedding_name}_{model}.png")
    plt.show()

NameError: name 'EnsmallenGraph' is not defined

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.ndimage.filters import gaussian_filter

def plot_embedding_degrees_heatmap(
    graph:EnsmallenGraph,
    embedding:np.ndarray,
    embedding_name:str,
    directory:str,
    epochs:int,
    model:str,
    embedding_type:str
):
    fig, axes = plt.subplots(figsize=(10,10), dpi=200)
    cm = plt.cm.get_cmap('RdYlBu')
    degrees = np.array([
        min(5, max(1, graph.degree(node)))
        for node in range(graph.get_nodes_number())
    ])
    sc = axes.scatter(
        *embedding.T,
        c=degrees,
        s=0.1,
        cmap=cm
        #norm=LogNorm(
        #    vmin=max(1, degrees.min()),
        #    vmax=np.median(degrees)*5,
        #    clip=True
        #)
    )

    fig.colorbar(sc, ax=axes)
    axes.set_title(f"Degrees heatmap {embedding_type} {embedding_name} embedding for {directory}, obtained using {model} trained for {epochs} epochs")
    plt.show()