Skip to content

Commit

Permalink
add pbmc5k data set (pinellolab#358)
Browse files Browse the repository at this point in the history
* add script to fetch pyroe data
* update label data script to use data set name

* update config for pbmc5k
* run data download stage
* run preprocess stage
* train pbmc5k model
* add pbmc5k training metrics
* postprocess pbmc5k data
* add pbmc5k reports folder
* summarize pbmc5k

* update pipeline
* pin cml docker image to previous version
  • Loading branch information
cameronraysmith committed Jun 23, 2023
1 parent 59dcc5f commit 04b1ce0
Show file tree
Hide file tree
Showing 12 changed files with 469 additions and 21 deletions.
6 changes: 6 additions & 0 deletions .github/pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ function run_parallel_pipeline() {
dvc repro train@larry_multilineage_model2 &
wait

dvc repro train@bonemarrow_model2 &
sleep 7
dvc repro train@pbmc10k_model2 &
sleep 7
dvc repro train@pbmc5k_model2 &

wait
dvc repro train
Expand Down Expand Up @@ -123,6 +127,8 @@ data_sets=(
"pons"
"pbmc68k"
"pbmc10k"
"pbmc5k"
"bonemarrow"
"larry"
"larry_mono"
"larry_neu"
Expand Down
3 changes: 2 additions & 1 deletion dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# syntax=docker/dockerfile:1
FROM ghcr.io/iterative/cml:0-dvc2-base1-gpu
# FROM ghcr.io/iterative/cml:0-dvc2-base1-gpu
FROM ghcr.io/iterative/cml@sha256:ad10a563de25311241f10d9d5509cecab6bc754b6b2c90b61e309e34fe80911e

WORKDIR ${CML_RUNNER_PATH}
COPY . pyrovelocity/
Expand Down
26 changes: 25 additions & 1 deletion pyrovelocity/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ def create_reports_config(model_name: str, model_number: int):
"pons",
"pancreas",
"bonemarrow",
"pbmc5k",
"pbmc10k",
"pbmc68k",
"larry",
Expand All @@ -565,6 +566,7 @@ def create_reports_config(model_name: str, model_number: int):
"pons",
"pancreas",
"bonemarrow",
"pbmc5k",
"pbmc10k",
"pbmc68k",
"larry",
Expand All @@ -583,6 +585,7 @@ def create_reports_config(model_name: str, model_number: int):
"larry_neu_model2",
"larry_multilineage_model2",
"pbmc10k_model2",
"pbmc5k_model2",
]

model_training = dict(
Expand Down Expand Up @@ -733,7 +736,17 @@ def create_reports_config(model_name: str, model_number: int):
"pbmc10k",
2,
"umap",
gpu_id=0,
gpu_id=1,
cell_state="celltype",
offset=True,
max_epochs=2000,
),
pbmc5k_model2=create_model_config(
"pyrovelocity",
"pbmc5k",
2,
"umap",
gpu_id=2,
cell_state="celltype",
offset=True,
max_epochs=2000,
Expand Down Expand Up @@ -883,6 +896,16 @@ def create_reports_config(model_name: str, model_number: int):
process_method="load_data",
process_args=dict(count_thres="${base.count_threshold}"),
),
pbmc5k=create_dataset_config(
source="pyrovelocity",
name="pbmc5k",
dl_root="${paths.data_external}",
data_file="pbmc5k.h5ad",
rel_path="${paths.data_external}/${.data_file}",
url="https://storage.googleapis.com/pyrovelocity/data/pbmc5k.h5ad",
process_method="load_data",
process_args=dict(count_thres="${base.count_threshold}"),
),
)

return make_config(
Expand All @@ -909,6 +932,7 @@ def create_reports_config(model_name: str, model_number: int):
# pons_model1=create_reports_config("pons", 1),
pons_model2=create_reports_config("pons", 2),
pbmc10k_model2=create_reports_config("pbmc10k", 2),
pbmc5k_model2=create_reports_config("pbmc5k", 2),
larry_tips_model2=create_reports_config("larry_tips", 2),
larry_mono_model2=create_reports_config("larry_mono", 2),
larry_neu_model2=create_reports_config("larry_neu", 2),
Expand Down
73 changes: 72 additions & 1 deletion reproducibility/figures/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ download_data:
- pons
- pancreas
- bonemarrow
- pbmc5k
- pbmc10k
- pbmc68k
- larry
Expand All @@ -24,6 +25,7 @@ process_data:
- pons
- pancreas
- bonemarrow
- pbmc5k
- pbmc10k
- pbmc68k
- larry
Expand All @@ -41,6 +43,7 @@ train_models:
- larry_neu_model2
- larry_multilineage_model2
- pbmc10k_model2
- pbmc5k_model2
data_sets:
simulate_medium:
source: simulate
Expand Down Expand Up @@ -225,6 +228,19 @@ data_sets:
count_thres: 0
rel_path: data/processed/pbmc10k_processed.h5ad
thresh_histogram_path: data/processed/pbmc10k_thresh_histogram.pdf
pbmc5k:
source: pyrovelocity
data_file: pbmc5k.h5ad
dl_root: data/external
dl_path: data/external/pbmc5k.h5ad
rel_path: data/external/pbmc5k.h5ad
url: https://storage.googleapis.com/pyrovelocity/data/pbmc5k.h5ad
derived:
process_method: load_data
process_args:
count_thres: 0
rel_path: data/processed/pbmc5k_processed.h5ad
thresh_histogram_path: data/processed/pbmc5k_thresh_histogram.pdf
model_training:
simulate_model2:
path: models/simulate_medium_model2
Expand Down Expand Up @@ -607,7 +623,7 @@ model_training:
run_info_path: models/pbmc10k_model2/run_info.json
vector_field_parameters:
basis: umap
gpu_id: 0
gpu_id: 1
training_parameters:
_target_: pyrovelocity.api.train_model
_partial_: true
Expand All @@ -633,6 +649,43 @@ model_training:
cell_specific_kinetics: null
kinetics_num: 2
loss_plot_path: models/pbmc10k_model2/loss_plot.png
pbmc5k_model2:
path: models/pbmc5k_model2
model_path: models/pbmc5k_model2/model
input_data_path: data/processed/pbmc5k_processed.h5ad
trained_data_path: models/pbmc5k_model2/trained.h5ad
pyrovelocity_data_path: models/pbmc5k_model2/pyrovelocity.pkl.zst
posterior_samples_path: models/pbmc5k_model2/posterior_samples.pkl.zst
metrics_path: models/pbmc5k_model2/metrics.json
run_info_path: models/pbmc5k_model2/run_info.json
vector_field_parameters:
basis: umap
gpu_id: 2
training_parameters:
_target_: pyrovelocity.api.train_model
_partial_: true
guide_type: auto
model_type: auto
svi_train: false
batch_size: -1
train_size: 1.0
use_gpu: 0
likelihood: Poisson
num_samples: 30
log_every: 100
cell_state: celltype
patient_improve: 0.0001
patient_init: 45
seed: 99
lr: 0.01
max_epochs: 2000
include_prior: true
library_size: true
offset: true
input_type: raw
cell_specific_kinetics: null
kinetics_num: 2
loss_plot_path: models/pbmc5k_model2/loss_plot.png
reports:
model_summary:
pancreas_model2:
Expand Down Expand Up @@ -662,6 +715,7 @@ reports:
rainbow_plot: reports/bonemarrow_model2/rainbow.pdf
uncertainty_param_plot: reports/bonemarrow_model2/param_uncertainties.pdf
vector_field_plot: reports/bonemarrow_model2/vector_field.pdf
posterior_phase_portraits: reports/bonemarrow_model2/posterior_phase_portraits
biomarker_selection_plot: reports/bonemarrow_model2/markers_selection_scatterplot.tif
biomarker_phaseportrait_plot: reports/bonemarrow_model2/markers_phaseportrait.pdf
fig2_part1_plot: reports/bonemarrow_model2/fig2_part1_plot.pdf
Expand Down Expand Up @@ -719,6 +773,23 @@ reports:
fig2_part2_plot: reports/pbmc10k_model2/fig2_part2_plot.pdf
violin_clusters_lin: reports/pbmc10k_model2/clusters_violin_lin.pdf
violin_clusters_log: reports/pbmc10k_model2/clusters_violin_log.pdf
pbmc5k_model2:
path: reports/pbmc5k_model2
trained_data_path: models/pbmc5k_model2/trained.h5ad
pyrovelocity_data_path: models/pbmc5k_model2/pyrovelocity.pkl.zst
dataframe_path: data/processed/pbmc5k_model2_dataframe.pkl.zst
shared_time_plot: reports/pbmc5k_model2/shared_time.pdf
volcano_plot: reports/pbmc5k_model2/volcano.pdf
rainbow_plot: reports/pbmc5k_model2/rainbow.pdf
uncertainty_param_plot: reports/pbmc5k_model2/param_uncertainties.pdf
vector_field_plot: reports/pbmc5k_model2/vector_field.pdf
posterior_phase_portraits: reports/pbmc5k_model2/posterior_phase_portraits
biomarker_selection_plot: reports/pbmc5k_model2/markers_selection_scatterplot.tif
biomarker_phaseportrait_plot: reports/pbmc5k_model2/markers_phaseportrait.pdf
fig2_part1_plot: reports/pbmc5k_model2/fig2_part1_plot.pdf
fig2_part2_plot: reports/pbmc5k_model2/fig2_part2_plot.pdf
violin_clusters_lin: reports/pbmc5k_model2/clusters_violin_lin.pdf
violin_clusters_log: reports/pbmc5k_model2/clusters_violin_log.pdf
larry_tips_model2:
path: reports/larry_tips_model2
trained_data_path: models/larry_tips_model2/trained.h5ad
Expand Down
Loading

0 comments on commit 04b1ce0

Please sign in to comment.