In [1]:
import plotly.express as px
import pandas as pd

def plot_scatter(
    data,
    color_dict,
    color,
):
    df_dict = color_dict.copy()

    dim = data.shape[1]
    assert dim in [2, 3]
    for i, name in zip(range(dim), ["x", "y", "z"]):
        df_dict[name] = data[:, i]

    df = pd.DataFrame(df_dict)
    
    # Adding a constant size column to make points smaller
    df["size"] = 1  # Adjust this value to control the size of the points

    scatter_kwargs = dict(
        x="x",
        y="y",
        color=color,
        #size="size",  # Use the size column for point sizes
        width=600,
        height=600,
        hover_data=list(df_dict.keys()),
    )
    
    if dim == 2:
        fig = px.scatter(df, **scatter_kwargs)
    else:  # dim == 3
        fig = px.scatter_3d(df, z="z", **scatter_kwargs)
    
    fig.update_traces(marker={'size': 5.5})

    return fig

## Load the data from training

In [2]:
import numpy as np
from synd.core import load_model

# Load the data
data = np.load('../static_model/z.npy')
# Load the model
model = load_model('../common_files/ntl9_folding.synd')
# Get the rmsd from the backmapper for all of the states
rmsd = np.array(model.backmap([i for i in range(3152)]))
# Add a zero to the end for the rmsd to the native state
rmsd = np.concatenate((np.concatenate(rmsd), np.zeros(1)))
# Load the extra target data
extra_data = np.load('../common_files/near_target_CA_rmsd.npy')
# Add the extra data to the rmsd
rmsd = np.concatenate((rmsd, extra_data))

## Plot the data

In [3]:
# Plot colored by cosine distance
from sklearn.metrics.pairwise import cosine_distances

# Make an array of the target point that is the shape of the data
target = np.array([data[-1] for i in range(len(data))])

# Get the cosine distance between the states
cosine = cosine_distances(data, target)[:, 0]

# Create the color dictionary
color_dict = {'cosine':  cosine, 'rmsd': rmsd}

# Plot the scatter plot
plot_scatter(data, color_dict, 'cosine')
