Skip to content

Commit

Permalink
add magnitude and pca space uncertainty evaluation (#253)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Cameron Smith <cameron.ray.smith@gmail.com>
  • Loading branch information
qinqian and cameronraysmith committed May 15, 2023
1 parent 344f7c9 commit fab8eda
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 13 deletions.
14 changes: 10 additions & 4 deletions reproducibility/figures/dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ stages:
cmd: /usr/bin/time -v python train.py model_training.train=[pancreas_model1]
deps:
- path: data/processed/pancreas_processed.h5ad
md5: 14f5abed25caaaaf7be172657b809b96
size: 424775272
md5: 4aaf4084689b5e10c60231552502bd98
size: 424784368
- path: train.py
md5: 02a061d8df9ebef4952c0e2c8a18ce19
size: 8722
Expand Down Expand Up @@ -2075,14 +2075,17 @@ stages:
cmd: python fig2/figure_extras.py reports.model_summary.summarize=[pbmc10k_model2,larry_tips_model2,pbmc68k_model2]
deps:
- path: fig2/figure_extras.py
md5: 632a9c3472762b91ca0d2d713edd93ab
size: 6039
md5: fa2ffb6ee7dc74b6eabdf39715312cba
size: 8868
- path: models/larry_tips_model2/pyrovelocity.pkl
md5: 52d63551f18a58477150a01d48cbb16f
size: 323695520
- path: models/pbmc10k_model2/pyrovelocity.pkl
md5: 2bc59cb5aee3692f2513da3c1a26f7da
size: 207371037
- path: models/pancreas_model2/pyrovelocity.pkl
md5: 1eeaa0c042af52c18af112c6c33893c3
size: 65873440
- path: models/pbmc68k_model2/pyrovelocity.pkl
md5: 51b6e251fff5481399746326fe165657
size: 320496178
Expand All @@ -2106,6 +2109,9 @@ stages:
- path: reports/fig2/figure2_extras.pdf
md5: 6849b13f7417618075f646134241ac6d
size: 181830
- path: reports/fig2/fig2_pancreas_pbmc_uncertainties_comparison.pdf
md5: 96b95db9d262cae8dfae37e2f0174831
size: 270423
summarize_pbmc10k_model2:
cmd: python summarize.py reports.model_summary.summarize=[pbmc10k_model2]
deps:
Expand Down
94 changes: 85 additions & 9 deletions reproducibility/figures/fig2/figure_extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ def plots(conf: DictConfig, logger: Logger) -> None:

time_cov_list = []
mag_cov_list = []
umap_mag_cov_list = []
angle_cov_list = []
pca_angle_cov_list = []
pca_mag_cov_list = []
names = []
for data_model in conf.reports.model_summary.summarize:
##################
Expand All @@ -71,11 +74,30 @@ def plots(conf: DictConfig, logger: Logger) -> None:
cell_angles_cov = angles_std / cell_angles_mean
angle_cov_list.append(cell_angles_cov)

cell_magnitudes = np.sqrt(
pca_cell_vector = posterior_samples["pca_vector_field_posterior_samples"]
# (samples, cell, 50pcs)
pca_cell_magnitudes = np.sqrt((pca_cell_vector**2).sum(axis=-1))
pca_cell_magnitudes_mean = pca_cell_magnitudes.mean(axis=-2)
pca_cell_magnitudes_std = pca_cell_magnitudes.std(axis=-2)
pca_cell_magnitudes_cov = pca_cell_magnitudes_std / pca_cell_magnitudes_mean
pca_mag_cov_list.append(pca_cell_magnitudes_cov)

pca_cell_angles = posterior_samples["pca_embeds_angle"] / np.pi * 180
pca_cell_angles_mean = pca_cell_angles.mean(axis=0)
pca_angles_std = circstd(pca_cell_angles * u.deg, method="angular", axis=0)
pca_cell_angles_cov = pca_angles_std / pca_cell_angles_mean
pca_angle_cov_list.append(pca_cell_angles_cov)

umap_cell_magnitudes = np.sqrt(
(posterior_samples["vector_field_posterior_samples"] ** 2).sum(axis=-1)
)
umap_cell_magnitudes_mean = umap_cell_magnitudes.mean(axis=-2)
umap_cell_magnitudes_std = umap_cell_magnitudes.std(axis=-2)
umap_cell_magnitudes_cov = umap_cell_magnitudes_std / umap_cell_magnitudes_mean

print(posterior_samples.keys())
cell_magnitudes = posterior_samples["original_spaces_embeds_magnitude"]
cell_magnitudes_mean = cell_magnitudes.mean(axis=-2)
# cell_magnitudes_mean = np.sqrt((posterior_samples["vector_field_posterior_mean"] ** 2).sum(axis=-1))
cell_magnitudes_std = cell_magnitudes.std(axis=-2)
cell_magnitudes_cov = cell_magnitudes_std / cell_magnitudes_mean

Expand All @@ -84,25 +106,35 @@ def plots(conf: DictConfig, logger: Logger) -> None:
cell_time_cov = cell_time_std / cell_time_mean
time_cov_list.append(cell_time_cov)
mag_cov_list.append(cell_magnitudes_cov)
umap_mag_cov_list.append(umap_cell_magnitudes_cov)
names += [data_model] * len(cell_time_cov)

print(posterior_samples["pca_vector_field_posterior_samples"].shape)
print(posterior_samples["embeds_angle"].shape)
time_cov_list = np.hstack(time_cov_list)
mag_cov_list = np.hstack(mag_cov_list)
angle_cov_list = np.hstack(angle_cov_list)
umap_mag_cov_list = np.hstack(umap_mag_cov_list)
pca_angle_cov_list = np.hstack(pca_angle_cov_list)
pca_mag_cov_list = np.hstack(pca_mag_cov_list)

metrics_df = pd.DataFrame(
{
"time_coefficient_of_variation": time_cov_list,
"magnitude_coefficient_of_variation": mag_cov_list,
"angle_coefficient_of_variation": angle_cov_list,
"pca_magnitude_coefficient_of_variation": pca_mag_cov_list,
"pca_angle_coefficient_of_variation": pca_angle_cov_list,
"umap_magnitude_coefficient_of_variation": umap_mag_cov_list,
"umap_angle_coefficient_of_variation": angle_cov_list,
"dataset": names,
}
)
logger.info(metrics_df.head())
shared_time_plot = conf.reports.figure2_extras.shared_time_plot
fig, ax = plt.subplots(1, 3)
fig.set_size_inches(15.6, 3.5)
order = ("pbmc10k_model2", "larry_tips_model2", "pbmc68k_model2")
fig, ax = plt.subplots(2, 3)
ax = ax.flatten()
fig.set_size_inches(15.6, 9)
order = ("pancreas_model2", "pbmc68k_model2")
sns.boxplot(
x="dataset",
y="time_coefficient_of_variation",
Expand All @@ -119,12 +151,33 @@ def plots(conf: DictConfig, logger: Logger) -> None:
)
sns.boxplot(
x="dataset",
y="angle_coefficient_of_variation",
y="pca_magnitude_coefficient_of_variation",
data=metrics_df,
ax=ax[2],
order=order,
)
pairs = [("pbmc10k_model2", "larry_tips_model2")]
sns.boxplot(
x="dataset",
y="umap_magnitude_coefficient_of_variation",
data=metrics_df,
ax=ax[3],
order=order,
)
sns.boxplot(
x="dataset",
y="pca_angle_coefficient_of_variation",
data=metrics_df,
ax=ax[4],
order=order,
)
sns.boxplot(
x="dataset",
y="umap_angle_coefficient_of_variation",
data=metrics_df,
ax=ax[5],
order=order,
)
pairs = [("pancreas_model2", "pbmc68k_model2")]
time_annotator = Annotator(
ax[0],
pairs,
Expand All @@ -146,12 +199,35 @@ def plots(conf: DictConfig, logger: Logger) -> None:
mag_annotator.configure(test="Mann-Whitney", text_format="star", loc="inside")
mag_annotator.apply_and_annotate()

mag_annotator = Annotator(
ax[2],
pairs,
data=metrics_df,
x="dataset",
y="pca_magnitude_coefficient_of_variation",
order=order,
)
mag_annotator.configure(test="Mann-Whitney", text_format="star", loc="inside")
mag_annotator.apply_and_annotate()

mag_annotator = Annotator(
ax[3],
pairs,
data=metrics_df,
x="dataset",
y="umap_magnitude_coefficient_of_variation",
order=order,
)
mag_annotator.configure(test="Mann-Whitney", text_format="star", loc="inside")
mag_annotator.apply_and_annotate()

# angle_annotator = Annotator(
# ax[2], pairs, data=metrics_df, x="dataset", y="angle_coefficient_of_variation", order=order
# )
# angle_annotator.configure(test="Mann-Whitney", text_format="star", loc="inside")
# angle_annotator.apply_and_annotate()
ax[2].set_ylim(-0.1, 0.1)
ax[4].set_ylim(-0.1, 0.1)
ax[5].set_ylim(-0.1, 0.1)

fig.savefig(
shared_time_plot,
Expand Down

0 comments on commit fab8eda

Please sign in to comment.