# Colab initialization
- install the pipeline in the colab runtime
- download files neccessary for this example

In [None]:
!pip3 install -U pip > /dev/null
!pip3 install -U "bio-embeddings[all] @ git+https://github.com/sacdallago/bio_embeddings.git" > /dev/null

In [None]:
!wget http://data.bioembeddings.com/public/embeddings/reference/deeploc/protbert_reference_embeddings.h5 --output-document protbert_reference_embeddings.h5
!wget http://data.bioembeddings.com/public/embeddings/reference/deeploc/annotations.csv --output-document annotations.csv

# Visualize sequence spaces drawn by DeepLoc embeddings
In this notebook, we use the output of the _embed_ stage to draw custom UMAP sequence space plots.

In [None]:
import h5py
import numpy as np
from pandas import read_csv, DataFrame
from bio_embeddings.project import umap_reduce
from bio_embeddings.visualize import render_scatter_plotly

In [None]:
annotations = read_csv('annotations.csv')

In [None]:
embeddings = []
identifiers = annotations.identifier.unique()

with h5py.File('protbert_reference_embeddings.h5', 'r') as f:
    for identifier in identifiers:
        embeddings.append(np.array(f[identifier]))

In [None]:
options = {
    'min_dist': .9,
    'n_neighbors': 50,
    'metric': 'cosine',
    'n_components': 2
}

projected_embeddings = umap_reduce(embeddings, **options)

In [None]:
projected_embeddings_dataframe = DataFrame(
    projected_embeddings,
    columns=["component_0", "component_1"],
    index=identifiers
)

In [None]:
merged_annotations_and_projected_embeddings = annotations.join(
    projected_embeddings_dataframe, on="identifier", how="left"
)

In [None]:
figure = render_scatter_plotly(merged_annotations_and_projected_embeddings)
figure.show()