Skip to content

Commit

Permalink
Merge branch 'jdaw/prevent-same-mods-being-called' into 'master'
Browse files Browse the repository at this point in the history
Prevent users from running models with overlapping mods

Closes DOR-486

See merge request machine-learning/dorado!781
  • Loading branch information
tijyojwad committed Dec 20, 2023
2 parents 371a252 + a023984 commit a510d53
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 2 deletions.
3 changes: 3 additions & 0 deletions dorado/api/runner_creation.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "runner_creation.h"

#include "basecall/crf_utils.h"
#include "modbase/ModBaseModelConfig.h"

#if DORADO_GPU_BUILD
#ifdef __APPLE__
Expand Down Expand Up @@ -142,6 +143,8 @@ std::vector<modbase::RunnerPtr> create_modbase_runners(
return {};
}

modbase::check_modbase_multi_model_compatibility(remora_models);

// generate model callers before nodes or it affects the speed calculations
std::vector<modbase::RunnerPtr> remora_runners;
std::vector<std::string> modbase_devices;
Expand Down
25 changes: 25 additions & 0 deletions dorado/modbase/ModBaseModelConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,29 @@ ModBaseInfo get_modbase_info(
return result;
}

void check_modbase_multi_model_compatibility(
const std::vector<std::filesystem::path>& modbase_models) {
std::string err_msg = "";
for (size_t i = 0; i < modbase_models.size(); i++) {
auto ref_model = load_modbase_model_config(modbase_models[i]);
const auto& ref_motif = ref_model.motif[ref_model.motif_offset];
for (size_t j = i + 1; j < modbase_models.size(); j++) {
auto query_model = load_modbase_model_config(modbase_models[j]);
const auto& query_motif = query_model.motif[query_model.motif_offset];

if (ref_motif == query_motif) {
err_msg += modbase_models[i].string() + " and " + modbase_models[j].string() +
" have overlapping canonical motif: " + ref_motif;
}
}
}

if (!err_msg.empty()) {
throw std::runtime_error(
"Following are incompatible modbase models. Please select only one of them to "
"run:\n" +
err_msg);
}
}

} // namespace dorado::modbase
7 changes: 5 additions & 2 deletions dorado/modbase/ModBaseModelConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,13 @@ struct ModBaseModelConfig {
bool reverse_signal{false}; ///< Reverse model data before processing (rna model)
};

ModBaseModelConfig load_modbase_model_config(const std::filesystem::path & model_path);
ModBaseModelConfig load_modbase_model_config(const std::filesystem::path& model_path);

// Determine the modbase alphabet from parameters and calculate offset positions for the results
ModBaseInfo get_modbase_info(
const std::vector<std::reference_wrapper<const ModBaseModelConfig>> & base_mod_params);
const std::vector<std::reference_wrapper<const ModBaseModelConfig>>& base_mod_params);

void check_modbase_multi_model_compatibility(
const std::vector<std::filesystem::path>& modbase_models);

} // namespace dorado::modbase
7 changes: 7 additions & 0 deletions tests/test_simple_basecaller_execution.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dorado_bin=$(cd "$(dirname $1)"; pwd -P)/$(basename $1)
model_name=${2:-dna_r10.4.1_e8.2_400bps_hac@v4.1.0}
batch=${3:-384}
model_name_5k=${4:-dna_r10.4.1_e8.2_400bps_hac@v4.2.0}
model_name_5k_v43=${4:-dna_r10.4.1_e8.2_400bps_hac@v4.3.0}
data_dir=$test_dir/data
output_dir_name=$(echo $RANDOM | head -c 10)
output_dir=${test_dir}/${output_dir_name}
Expand All @@ -27,6 +28,8 @@ $dorado_bin download --model ${model_name} --directory ${output_dir}
model=${output_dir}/${model_name}
$dorado_bin download --model ${model_name_5k} --directory ${output_dir}
model_5k=${output_dir}/${model_name_5k}
$dorado_bin download --model ${model_name_5k_v43} --directory ${output_dir}
model_5k_v43=${output_dir}/${model_name_5k_v43}

echo dorado basecaller test stage
$dorado_bin basecaller ${model} $data_dir/pod5 -b ${batch} --emit-fastq > $output_dir/ref.fq
Expand Down Expand Up @@ -56,6 +59,10 @@ if $dorado_bin basecaller ${model} $data_dir/pod5 -b ${batch} --emit-fastq --mod
echo "Error: dorado basecaller should fail with combination of emit-fastq and modbase!"
exit 1
fi
if $dorado_bin basecaller $model_5k_v43 $data_dir/duplex/pod5 --modified-bases 5mC_5hmC 5mCG_5hmCG > $output_dir/error_condition.fq; then
echo "Error: dorado basecaller should fail with multiple modbase configs having overlapping mods!"
exit 1
fi
set -e

echo dorado summary test stage
Expand Down

0 comments on commit a510d53

Please sign in to comment.