diff --git a/dorado/api/runner_creation.cpp b/dorado/api/runner_creation.cpp index 2fc55b9e..8d580c7a 100644 --- a/dorado/api/runner_creation.cpp +++ b/dorado/api/runner_creation.cpp @@ -1,6 +1,7 @@ #include "runner_creation.h" #include "basecall/crf_utils.h" +#include "modbase/ModBaseModelConfig.h" #if DORADO_GPU_BUILD #ifdef __APPLE__ @@ -142,6 +143,8 @@ std::vector create_modbase_runners( return {}; } + modbase::check_modbase_multi_model_compatibility(remora_models); + // generate model callers before nodes or it affects the speed calculations std::vector remora_runners; std::vector modbase_devices; diff --git a/dorado/modbase/ModBaseModelConfig.cpp b/dorado/modbase/ModBaseModelConfig.cpp index 1111c7fd..0b2fb4c2 100644 --- a/dorado/modbase/ModBaseModelConfig.cpp +++ b/dorado/modbase/ModBaseModelConfig.cpp @@ -130,4 +130,29 @@ ModBaseInfo get_modbase_info( return result; } +void check_modbase_multi_model_compatibility( + const std::vector& modbase_models) { + std::string err_msg = ""; + for (size_t i = 0; i < modbase_models.size(); i++) { + auto ref_model = load_modbase_model_config(modbase_models[i]); + const auto& ref_motif = ref_model.motif[ref_model.motif_offset]; + for (size_t j = i + 1; j < modbase_models.size(); j++) { + auto query_model = load_modbase_model_config(modbase_models[j]); + const auto& query_motif = query_model.motif[query_model.motif_offset]; + + if (ref_motif == query_motif) { + err_msg += modbase_models[i].string() + " and " + modbase_models[j].string() + + " have overlapping canonical motif: " + ref_motif; + } + } + } + + if (!err_msg.empty()) { + throw std::runtime_error( + "Following are incompatible modbase models. Please select only one of them to " + "run:\n" + + err_msg); + } +} + } // namespace dorado::modbase diff --git a/dorado/modbase/ModBaseModelConfig.h b/dorado/modbase/ModBaseModelConfig.h index 84fabbdc..3225c6ae 100644 --- a/dorado/modbase/ModBaseModelConfig.h +++ b/dorado/modbase/ModBaseModelConfig.h @@ -26,10 +26,13 @@ struct ModBaseModelConfig { bool reverse_signal{false}; ///< Reverse model data before processing (rna model) }; -ModBaseModelConfig load_modbase_model_config(const std::filesystem::path & model_path); +ModBaseModelConfig load_modbase_model_config(const std::filesystem::path& model_path); // Determine the modbase alphabet from parameters and calculate offset positions for the results ModBaseInfo get_modbase_info( - const std::vector> & base_mod_params); + const std::vector>& base_mod_params); + +void check_modbase_multi_model_compatibility( + const std::vector& modbase_models); } // namespace dorado::modbase diff --git a/tests/test_simple_basecaller_execution.sh b/tests/test_simple_basecaller_execution.sh index 44f7499a..c25c7c9d 100755 --- a/tests/test_simple_basecaller_execution.sh +++ b/tests/test_simple_basecaller_execution.sh @@ -15,6 +15,7 @@ dorado_bin=$(cd "$(dirname $1)"; pwd -P)/$(basename $1) model_name=${2:-dna_r10.4.1_e8.2_400bps_hac@v4.1.0} batch=${3:-384} model_name_5k=${4:-dna_r10.4.1_e8.2_400bps_hac@v4.2.0} +model_name_5k_v43=${4:-dna_r10.4.1_e8.2_400bps_hac@v4.3.0} data_dir=$test_dir/data output_dir_name=$(echo $RANDOM | head -c 10) output_dir=${test_dir}/${output_dir_name} @@ -27,6 +28,8 @@ $dorado_bin download --model ${model_name} --directory ${output_dir} model=${output_dir}/${model_name} $dorado_bin download --model ${model_name_5k} --directory ${output_dir} model_5k=${output_dir}/${model_name_5k} +$dorado_bin download --model ${model_name_5k_v43} --directory ${output_dir} +model_5k_v43=${output_dir}/${model_name_5k_v43} echo dorado basecaller test stage $dorado_bin basecaller ${model} $data_dir/pod5 -b ${batch} --emit-fastq > $output_dir/ref.fq @@ -56,6 +59,10 @@ if $dorado_bin basecaller ${model} $data_dir/pod5 -b ${batch} --emit-fastq --mod echo "Error: dorado basecaller should fail with combination of emit-fastq and modbase!" exit 1 fi +if $dorado_bin basecaller $model_5k_v43 $data_dir/duplex/pod5 --modified-bases 5mC_5hmC 5mCG_5hmCG > $output_dir/error_condition.fq; then + echo "Error: dorado basecaller should fail with multiple modbase configs having overlapping mods!" + exit 1 +fi set -e echo dorado summary test stage