In [1]:
import plotly.express as px
import pandas as pd
import numpy as np
import umap.umap_ as umap
import os

In [20]:
read_dir = "/Users/nick/Cole Trapnell's Lab Dropbox/Nick Lammers/Nick/morphseq/morphseq_analyses/lmx1b"
figure_dir = "/Users/nick/Cole Trapnell's Lab Dropbox/Nick Lammers/Nick/slides/20240207/"
master_df_ref = pd.read_csv(os.path.join(read_dir, "morphseq_df.csv"), index_col=0)
hooke_latent_df = pd.read_csv(os.path.join(read_dir, "hooke_latent_df_clean.csv"))
morph_df = pd.read_csv(os.path.join(read_dir, "morph_latent_df_clean.csv"))

In [198]:
master_df_ref.shape

(174, 432)

#### Can hooke latent space predict distance morphological distance from WT reference? 

In [190]:
from sklearn.neural_network import MLPRegressor

n_train = 100
np.random.seed(314)
option_vec = master_df_ref.index
train_indices = np.random.choice(option_vec, n_train, replace=False)
test_indices = np.asarray([i for i in option_vec if i not in train_indices])

# get test and train datasets
Y_train = master_df_ref.loc[train_indices, "ctrl_dist_morph"]
nan_train_indices = np.isnan(Y_train)

Y_train = Y_train[~nan_train_indices]
Y_test = master_df_ref.loc[test_indices, "ctrl_dist_morph"]
nan_test_indices = np.isnan(Y_test)
Y_test = Y_test[~nan_test_indices]

X_train = master_df_ref.loc[train_indices, ["UMAP_hooke_00_3", "UMAP_hooke_01_3", "UMAP_hooke_02_3"]]
X_train = X_train.loc[~nan_train_indices, :]

X_test = master_df_ref.loc[test_indices, ["UMAP_hooke_00_3", "UMAP_hooke_01_3", "UMAP_hooke_02_3"]]
X_test = X_test.loc[~nan_test_indices, :]

In [191]:
# fit model
regr_dist = MLPRegressor(random_state=1, max_iter=5000, hidden_layer_sizes=(500,))
regr_dist.fit(X_train, Y_train)

# get model scores
reg_score_train = regr_dist.score(X_train, Y_train)
reg_score_test = regr_dist.score(X_test, Y_test)

print(reg_score_train)
print(reg_score_test)

0.9388704818523632
0.9183773204963906


In [196]:
Y_test_pd = regr_dist.predict(X_test)

res_df_test = pd.DataFrame(Y_test_pd[:, np.newaxis], columns=["morph_ctrl_dist_pd"])
res_df_test["morph_ctrl_dist_actual"] = Y_test.to_numpy()

pert = master_df_ref.loc[test_indices, ["master_perturbation"]].copy()
pert = pert.loc[~nan_test_indices, :].to_numpy()
                      
                      
res_df_test["master_perturbation"] = pert

res_df_test.head(6)

Unnamed: 0,morph_ctrl_dist_pd,morph_ctrl_dist_actual,master_perturbation
0,1.646413,2.092099,lmx1b
1,1.226114,0.995113,wik-inj-ctrl
2,1.71318,1.865148,lmx1b
3,1.155698,0.601148,wik-inj-ctrl
4,1.61709,1.760015,lmx1b
5,1.697254,1.670301,lmx1b


In [197]:
tbxta_bool = (res_df_test["master_perturbation"] == "tbxta").to_numpy()
lmx_bool = (res_df_test["master_perturbation"] == "lmx1b").to_numpy()
ctrl_bool = (res_df_test["master_perturbation"] == "wik-inj-ctrl").to_numpy()

all_bool = tbxta_bool | lmx_bool | ctrl_bool

fig = px.scatter(res_df_test.loc[all_bool], x="morph_ctrl_dist_pd", y="morph_ctrl_dist_actual", color="master_perturbation",
                 opacity =0.8,
                 labels={"morph_ctrl_dist_actual": "actual distance from WT reference",
                         "morph_ctrl_dist_pd": "predicted distance from WT reference",
                         "master_perturbation":"perturbation"})

fig.update_traces(marker=dict(size=10,
                              line=dict(width=1,
                                        color='DarkSlateGrey')))

ref_line = np.linspace(-1, 15)
fig.add_scatter(
        x=ref_line, 
        y=ref_line, 
        mode='lines', 
        line_color='gray', 
        showlegend=False,
        line=dict(width=1, dash='dot')
    )

                  
fig.show()
# save
fig.write_image(os.path.join(figure_dir, "lmx_distance_prediction_plot.png"), scale=4)

Switch up the plotting style

In [200]:
import plotly.graph_objects as go

label_dict = {"morph_wik_dist_actual": "actual distance from WT reference",
                         "morph_wik_dist_pd": "predicted distance from WT reference",
                         "master_perturbation":"perturbation"}
pastel_red = "rgb(251,180,174)"

color_dict = dict({"lmx1b" :"#EF553B" , "wik":"black", "wik-inj-ctrl":"#636EFA", "tbxta":"#00CC96"})
color_vec = np.asarray([color_dict[p] for p in res_df_test.loc[:, "master_perturbation"].tolist()])

# just the line
fig = go.Figure()

ref_line = np.linspace(-1, 15)
fig.add_scatter(
        x=ref_line, 
        y=ref_line, 
        mode='lines', 
        line_color='gray', 
        showlegend=False,
        line=dict(width=2, dash='dot')
    )


fig.update_layout(
    xaxis_title="predicted distance from WT reference",
    yaxis_title="actual distance from WT reference",
    legend_title="perturbation",
    font=dict(
        family="Arial",
        size=14,
        color="Black"
    )
)

fig.add_scatter(x=res_df_test.loc[tbxta_bool, "morph_ctrl_dist_pd"], 
                y=res_df_test.loc[tbxta_bool, "morph_ctrl_dist_actual"], 
                mode="markers",
                marker=dict(size=10, opacity=0, color=color_vec[tbxta_bool],
                                      ),
                name="wik-inj-ctrl"
               )

fig.write_image(os.path.join(figure_dir, "lmx_distance_prediction_plot00.png"), scale=4)

#######
# add tbxta and inj-ctrl

fig = go.Figure()

ref_line = np.linspace(-1, 15)
fig.add_scatter(
        x=ref_line, 
        y=ref_line, 
        mode='lines', 
        line_color='gray', 
        showlegend=False,
        line=dict(width=2, dash='dot')
    )


fig.update_layout(
    xaxis_title="predicted distance from WT reference",
    yaxis_title="actual distance from WT reference",
    legend_title="perturbation",
    font=dict(
        family="Arial",
        size=14,
        color="Black"
    )
)

fig.add_scatter(x=res_df_test.loc[tbxta_bool, "morph_ctrl_dist_pd"], 
                y=res_df_test.loc[tbxta_bool, "morph_ctrl_dist_actual"], 
                mode="markers",
                marker=dict(size=10, opacity=1, color=color_vec[tbxta_bool],
                                      line=dict(width=0.8,
                                        color='DarkSlateGrey')),
                name="tbxta",
                showlegend=True
               )

fig.add_scatter(x=res_df_test.loc[ctrl_bool, "morph_ctrl_dist_pd"], 
                y=res_df_test.loc[ctrl_bool, "morph_ctrl_dist_actual"], 
                mode="markers",
                marker=dict(size=10, opacity=1, color=color_vec[ctrl_bool],
                                      line=dict(width=0.8,
                                        color='DarkSlateGrey')),
                name="wik-inj-ctrl"
               )



fig.write_image(os.path.join(figure_dir, "lmx_distance_prediction_plot01.png"), scale=4)
     
fig.add_scatter(x=res_df_test.loc[lmx_bool, "morph_ctrl_dist_pd"], 
                y=res_df_test.loc[lmx_bool, "morph_ctrl_dist_actual"], 
                mode="markers",
                marker=dict(size=10.5, opacity=1, color=color_vec[lmx_bool],
                                      line=dict(width=1.3,
                                        color='DarkSlateGrey')),
                name="lmx1b"
               )


fig.write_image(os.path.join(figure_dir, "lmx_distance_prediction_plot02.png"), scale=4)
                            
fig.show()
# # # save
# # fig.write_image(os.path.join(figure_dir, "lmx_distance_prediction_plot.png"), scale=4)

### Latent space plots

In [105]:
# color_vec2 = np.asarray([color_dict[p] for p in master_df_ref.loc[:, "master_perturbation"].tolist()])

color_discrete_map = {"lmx1b" :"#EF553B" , "wik":"rgb(179, 205, 227)", "wik-inj-ctrl":"#636EFA", "tbxta":"#00CC96"}

morph_fig = px.scatter_3d(master_df_ref, x="UMAP_00_bio_3", y="UMAP_01_bio_3", z="UMAP_02_bio_3",
                    color="master_perturbation", color_discrete_map=color_discrete_map,opacity=1)

morph_fig.update_traces(marker=dict(size=7, 
                              line=dict(width=2,
                                        color='DarkSlateGrey'))
                 )

morph_fig.update_layout(scene = dict(
                    xaxis_title="UMAP 1",
                    yaxis_title="UMAP 2",
                    zaxis_title="UMAP 3",
                    xaxis = dict(showticklabels=False),
                    yaxis = dict(showticklabels=False),
                    zaxis = dict(showticklabels=False)
                    ),   
    legend_title="perturbation",
    font=dict(
        family="Arial",
        size=14,
        color="Black"
    )
)
                  
morph_fig.show()

In [106]:
import math
from tqdm import tqdm

angle_vec = np.linspace(1.36, 7.66, 50)
frame_dir = os.path.join(figure_dir, "lmx_morph_UMAP_frames", "")
if not os.path.isdir(frame_dir):
    os.makedirs(frame_dir)

# make rotating plots
for iter_i, a in enumerate(tqdm(angle_vec)):
    angle = a
    za = 0.6
    vec = np.asarray([math.cos(angle), math.sin(angle), za])
    vec = vec*2.5
    camera = dict(
        eye=dict(x=vec[0], y=vec[1], z=vec[2]))
    
    morph_fig.update_layout(scene_camera=camera, scene_dragmode='orbit')
    
    if iter_i == 0:
        morph_fig.write_image(os.path.join(figure_dir, "umap_scatter_lmx" + f"_{iter_i:03}" + ".png"), scale=3)
        
        morph_fig.update_layout(scene = dict(
                    xaxis_title="",
                    yaxis_title="",
                    zaxis_title="",
                    xaxis = dict(showticklabels=False),
                    yaxis = dict(showticklabels=False),
                    zaxis = dict(showticklabels=False)
                    ))
        
    morph_fig.write_image(os.path.join(frame_dir, "umap_scatter_lmx" + f"_{iter_i:03}" + ".png"), scale=2)
    
morph_fig.show()

100%|██████████| 50/50 [01:13<00:00,  1.47s/it]


## Hooke latent space plots

In [146]:
hooke_fig1 = px.scatter_3d(master_df_ref, x="UMAP_hooke_00_3", y="UMAP_hooke_01_3", z="UMAP_hooke_02_3",
                    color="master_perturbation", color_discrete_map=color_discrete_map, opacity=1)

hooke_fig1.update_traces(marker=dict(size=7, 
                              line=dict(width=2,
                                        color='DarkSlateGrey'))
                 )

hooke_fig1.update_layout(scene = dict(
                    xaxis_title="UMAP 1",
                    yaxis_title="UMAP 2",
                    zaxis_title="UMAP 3",
                    xaxis = dict(showticklabels=False),
                    yaxis = dict(showticklabels=False),
                    zaxis = dict(showticklabels=False)
                    ),   
    legend_title="perturbation",
    font=dict(
        family="Arial",
        size=14,
        color="Black"
    )
)
                  
hooke_fig1.show()

In [147]:
def make_rotating_figure(angle_vec, frame_dir, fig, prefix):
    
    for iter_i, a in enumerate(tqdm(angle_vec)):
        angle = a
        za = 0.5
        vec = np.asarray([math.cos(angle), math.sin(angle), za])
        vec = vec*2
        camera = dict(
            eye=dict(x=vec[0], y=vec[1], z=vec[2]))

        fig.update_layout(scene_camera=camera, scene_dragmode='orbit')

        if iter_i == 0:
            fig.write_image(os.path.join(figure_dir, prefix + "_umap_scatter_lmx" + f"_{iter_i:03}" + ".png"), scale=3)

            fig.update_layout(scene = dict(
                        xaxis_title="",
                        yaxis_title="",
                        zaxis_title="",
                        xaxis = dict(showticklabels=False),
                        yaxis = dict(showticklabels=False),
                        zaxis = dict(showticklabels=False)
                        ),
                        showlegend=False)
            fig.update_coloraxes(showscale=False)

        fig.write_image(os.path.join(frame_dir, "umap_scatter_lmx" + f"_{iter_i:03}" + ".png"), scale=2)
        
    return fig

In [148]:
angle_vec = np.linspace(1, 1+2*np.pi, 50)
frame_dir = os.path.join(figure_dir, "lmx_hooke_UMAP_genotype_frames", "")
if not os.path.isdir(frame_dir):
    os.makedirs(frame_dir)

# make rotating plots

hooke_fig1 = make_rotating_figure(angle_vec[:2], frame_dir, hooke_fig1, "hooke_genotype_")
hooke_fig1.show()

100%|██████████| 2/2 [00:05<00:00,  2.68s/it]


In [149]:
hooke_fig2 = px.scatter_3d(master_df_ref, x="UMAP_hooke_00_3", y="UMAP_hooke_01_3", z="UMAP_hooke_02_3",
                    color="predicted_stage_hpf", color_discrete_map=color_discrete_map, opacity=1)

hooke_fig2.update_traces(marker=dict(size=7, 
                              line=dict(width=2,
                                        color='DarkSlateGrey'))
                 )

hooke_fig2.update_layout(scene = dict(
                    xaxis_title="UMAP 1",
                    yaxis_title="UMAP 2",
                    zaxis_title="UMAP 3",
                    xaxis = dict(showticklabels=False),
                    yaxis = dict(showticklabels=False),
                    zaxis = dict(showticklabels=False)
                    ),   
    legend_title="stage (hpf)",
    font=dict(
        family="Arial",
        size=14,
        color="Black"
    )
)
                  
hooke_fig2.show()

In [151]:
angle_vec = np.linspace(1, 1+2*np.pi, 50)
frame_dir = os.path.join(figure_dir, "lmx_hooke_UMAP_stage_frames", "")
if not os.path.isdir(frame_dir):
    os.makedirs(frame_dir)

# make rotating plots

hooke_fig2 = make_rotating_figure(angle_vec[:2], frame_dir, hooke_fig2, "hooke_stage_")
hooke_fig2.show()

100%|██████████| 2/2 [00:04<00:00,  2.16s/it]


## Latent space regression results

In [153]:
Y_train_u = master_df_ref.loc[train_indices, ["UMAP_00_bio_3", "UMAP_01_bio_3", "UMAP_02_bio_3"]]
Y_test_u = master_df_ref.loc[test_indices, ["UMAP_00_bio_3", "UMAP_01_bio_3", "UMAP_02_bio_3"]]

X_train_u = master_df_ref.loc[train_indices, ["UMAP_hooke_00_3", "UMAP_hooke_01_3", "UMAP_hooke_02_3"]]
X_test_u = master_df_ref.loc[test_indices, ["UMAP_hooke_00_3", "UMAP_hooke_01_3", "UMAP_hooke_02_3"]]

In [159]:
regr_u = MLPRegressor(random_state=1, max_iter=5000, hidden_layer_sizes=(500,))
regr_u.fit(X_train_u, Y_train_u)
test_score = regr_u.score(X_test_u, Y_test_u)
print(test_score)

0.7165798387208518


In [175]:
Y_test_pd = regr_u.predict(X_test_u)
Y_test_arr = Y_test_u.to_numpy()

color_dict2 = dict({"lmx1b" :"#EF553B" , "wik":"rgb(179, 205, 227)", "wik-inj-ctrl":"#636EFA", "tbxta":"#00CC96"})
color_vec_u = np.asarray([color_dict2[p] for p in master_df_ref.loc[test_indices, "master_perturbation"].tolist()])
fig = go.Figure()

for r in range(Y_test.shape[0]):
    data = np.concatenate((Y_test_arr[r, :].reshape((1,3)), Y_test_pd[r, :].reshape((1,3))), axis=0)

    fig.add_trace(go.Scatter3d(x=data[:, 0], y=data[:, 1], z=data[:, 2], mode="lines+markers", 
                               line=dict(color='DarkSlateGrey'), marker=dict(size=1),
                            showlegend=False))
    
fig.add_trace(go.Scatter3d(x=Y_test_arr[:, 0], y=Y_test_arr[:, 1], z=Y_test_arr[:, 2], mode="markers", 
                         marker=dict(color=color_vec_u, size=5), showlegend=False))
fig.add_trace(go.Scatter3d(x=Y_test_pd[:, 0], y=Y_test_pd[:, 1], z=Y_test_pd[:, 2], mode="markers", 
                         marker=dict(color=color_vec_u, size=10, symbol="diamond"), showlegend=False))

fig.update_traces(marker=dict( 
                              line=dict(width=2,
                                        color='DarkSlateGrey'))
                 )
    
fig.show()

In [187]:
fig = go.Figure()
plot0 = 0
plot1 = 1

fig.add_trace(go.Scatter(x=Y_test_arr[:, plot0], y=Y_test_arr[:, plot1], mode="markers", 
                         marker=dict(color=color_vec_u, size=7), showlegend=False))

fig.update_layout(
                    xaxis_title="UMAP 1",
                    yaxis_title="UMAP 2",   
    legend_title="stage (hpf)",
    font=dict(
        family="Arial",
        size=14,
        color="Black"
    ))
        
fig.update_traces(marker=dict( 
                              line=dict(width=2,
                                        color='DarkSlateGrey'))
                 )

fig.show()
fig.write_image(os.path.join(figure_dir, "umap_2d_pd_pre.png"), scale=2)

In [188]:

for r in range(Y_test.shape[0]):
    data = np.concatenate((Y_test_arr[r, :].reshape((1,3)), Y_test_pd[r, :].reshape((1,3))), axis=0)

    fig.add_trace(go.Scatter(x=data[:, plot0], y=data[:, plot1], mode="lines+markers", 
                               line=dict(color='DarkSlateGrey', width=1), marker=dict(size=1),
                            showlegend=False))
    
fig.add_trace(go.Scatter(x=Y_test_arr[:, plot0], y=Y_test_arr[:, plot1], mode="markers", 
                         marker=dict(color=color_vec_u, size=7), showlegend=False))
fig.add_trace(go.Scatter(x=Y_test_pd[:, plot0], y=Y_test_pd[:, plot1], mode="markers", 
                         marker=dict(color=color_vec_u, size=10, symbol="diamond"), showlegend=False))

fig.update_traces(marker=dict( 
                              line=dict(width=2,
                                        color='DarkSlateGrey'))
                 )
    
fig.write_image(os.path.join(figure_dir, "umap_2d_pd_genotype.png"), scale=2)

In [189]:
color_vec_u2 = master_df_ref.loc[test_indices, "predicted_stage_hpf"].to_numpy()

# fig = go.Figure()

# plot0 = 0
# plot1 = 1

# for r in range(Y_test.shape[0]):
#     data = np.concatenate((Y_test_arr[r, :].reshape((1,3)), Y_test_pd[r, :].reshape((1,3))), axis=0)

#     fig.add_trace(go.Scatter(x=data[:, plot0], y=data[:, plot1], mode="lines+markers", 
#                                line=dict(color='DarkSlateGrey', width=1), marker=dict(size=1),
#                             showlegend=False))
    
# fig.add_trace(go.Scatter(x=Y_test_arr[:, plot0], y=Y_test_arr[:, plot1], mode="markers", 
#                          marker=dict(color=color_vec_u, size=5), showlegend=False))
# fig.add_trace(go.Scatter(x=Y_test_pd[:, plot0], y=Y_test_pd[:, plot1], mode="markers", 
#                          marker=dict(color=color_vec_u2, size=10, symbol="diamond"), showlegend=False))

fig.update_traces(marker=dict( 
                              color=color_vec_u2)
                 )
    
fig.show()
fig.write_image(os.path.join(figure_dir, "umap_2d_pd_stage.png"), scale=2)