In [1]:
import h5py
import numpy as np
from pandas import read_csv
from bio_embeddings.project import tsne_reduce
from bio_embeddings.visualize import render_3D_scatter_plotly, save_plotly_figure_to_html



In [2]:
mapping_file = read_csv('mapping_file.csv', index_col=0)

In [3]:
embeddings = []
with h5py.File('reduced_embeddings_file.h5', 'r') as f:
    for remapped_id in mapping_file.index:
        embeddings.append(np.array(f[remapped_id]))

In [4]:
options = {
    'perplexity': 30,
    'n_iter': 20000
}

projected_embeddings = tsne_reduce(embeddings, **options)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 1162 samples in 0.001s...
[t-SNE] Computed neighbors for 1162 samples in 0.040s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1162
[t-SNE] Computed conditional probabilities for sample 1162 / 1162
[t-SNE] Mean sigma: 0.183682
[t-SNE] KL divergence after 250 iterations with early exaggeration: 78.617180
[t-SNE] KL divergence after 8600 iterations: 1.262826


In [5]:
mapping_file['x'] = projected_embeddings[:, 0]
mapping_file['y'] = projected_embeddings[:, 1]
mapping_file['z'] = projected_embeddings[:, 2]

In [6]:
annotation_files = ['disprot_2019_09_floats.csv', 
                    'disprot_2019_09_extreme_ends_0.2vs0.8.csv', 
                    'disprot_2019_09_extreme_ends_0.3vs0.7.csv', 
                    'disprot_2019_09_extreme_ends_0.5vs0.5.csv',
                    'disprot_2019_09_3classes_0.2_0.8.csv']

for annotation_file_path in annotation_files:
    annotation_file = read_csv(annotation_file_path, index_col=0)

    merged_annotation_file = annotation_file.join(mapping_file.set_index('original_id'))
    figure = render_3D_scatter_plotly(merged_annotation_file)
    save_plotly_figure_to_html(figure, str(annotation_file_path + ".html"))