From 6515caf64a971f6392c99404456212a92bcfb673 Mon Sep 17 00:00:00 2001 From: Steve Malton Date: Tue, 5 Dec 2023 17:23:56 +0000 Subject: [PATCH 1/6] Clean up auto model downloads on failure --- dorado/cli/basecaller.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dorado/cli/basecaller.cpp b/dorado/cli/basecaller.cpp index 29bb95b4..4603b4e8 100644 --- a/dorado/cli/basecaller.cpp +++ b/dorado/cli/basecaller.cpp @@ -458,9 +458,8 @@ int basecaller(int argc, char* argv[]) { auto ways = {model_selection.has_mods_variant(), !mod_bases.empty(), !mod_bases_models.empty()}; if (std::count(ways.begin(), ways.end(), true) > 1) { spdlog::error( - "only one of --modified-bases, --modified-bases-models, or modified models set " - "via models argument can be used at once", - model_arg); + "Only one of --modified-bases, --modified-bases-models, or modified models set " + "via models argument can be used at once"); std::exit(EXIT_FAILURE); }; @@ -578,6 +577,7 @@ int basecaller(int argc, char* argv[]) { temp_download_paths = model_finder.downloaded_models(); } catch (std::exception& e) { spdlog::error(e.what()); + utils::clean_temporary_models(model_finder.downloaded_models()); std::exit(EXIT_FAILURE); } } @@ -605,6 +605,7 @@ int basecaller(int argc, char* argv[]) { parser.visible.get("--estimate-poly-a"), model_selection); } catch (const std::exception& e) { spdlog::error("{}", e.what()); + utils::clean_temporary_models(temp_download_paths); return 1; } From e8d43a4e6b3a9be6f30d32baad782a6555782ebc Mon Sep 17 00:00:00 2001 From: Steve Malton Date: Tue, 5 Dec 2023 17:24:38 +0000 Subject: [PATCH 2/6] Let exception bubble up and be captured at the top level --- dorado/cli/duplex.cpp | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/dorado/cli/duplex.cpp b/dorado/cli/duplex.cpp index b9a4b30a..0d746269 100644 --- a/dorado/cli/duplex.cpp +++ b/dorado/cli/duplex.cpp @@ -70,20 +70,16 @@ ModelFinder get_model_finder(const std::string& model_arg, // Get the model name const auto model_path = std::filesystem::canonical(std::filesystem::path(model_arg)); const auto model_name = model_path.filename().string(); - try { - // Try to find the model - const auto model_info = ModelFinder::get_simplex_model_info(model_name); - - // Pass the model's ModelVariant (e.g. HAC) in here so everything matches - const auto inferred_selection = ModelSelection{ - models::to_string(model_info.simplex.variant), model_info.simplex, {}}; - - // Return the ModelFinder which hasn't needed to inspect any data - return ModelFinder{model_info.chemistry, inferred_selection, false}; - } catch (const std::exception& e) { - spdlog::error(e.what()); - std::exit(EXIT_FAILURE); - } + + // Try to find the model + const auto model_info = ModelFinder::get_simplex_model_info(model_name); + + // Pass the model's ModelVariant (e.g. HAC) in here so everything matches + const auto inferred_selection = ModelSelection{ + models::to_string(model_info.simplex.variant), model_info.simplex, {}}; + + // Return the ModelFinder which hasn't needed to inspect any data + return ModelFinder{model_info.chemistry, inferred_selection, false}; } // Model complex given, inspect data to find chemistry. From a2147ee46036c7c17dd178bf7b95a25f7d6a8cb6 Mon Sep 17 00:00:00 2001 From: Steve Malton Date: Tue, 5 Dec 2023 17:25:21 +0000 Subject: [PATCH 3/6] Throw instead of exit so we can clean up at the top --- dorado/cli/duplex.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/dorado/cli/duplex.cpp b/dorado/cli/duplex.cpp index 0d746269..e5901452 100644 --- a/dorado/cli/duplex.cpp +++ b/dorado/cli/duplex.cpp @@ -100,11 +100,9 @@ DuplexModels load_models(const std::string& model_arg, auto ways = {inferred_selection.has_mods_variant(), !mod_bases.empty(), !mod_bases_models.empty()}; if (std::count(ways.begin(), ways.end(), true) > 1) { - spdlog::error( - "only one of --modified-bases, --modified-bases-models, or modified models set " - "via models argument can be used at once", - model_arg); - std::exit(EXIT_FAILURE); + throw std::runtime_error( + "Only one of --modified-bases, --modified-bases-models, or modified models set " + "via models argument can be used at once"); }; if (inferred_selection.model.variant == ModelVariant::FAST) { From fb9d3a6e22d860be59efd09779b737bdf9513ea4 Mon Sep 17 00:00:00 2001 From: Steve Malton Date: Tue, 5 Dec 2023 17:25:59 +0000 Subject: [PATCH 4/6] Remove duplicated warning --- dorado/cli/duplex.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dorado/cli/duplex.cpp b/dorado/cli/duplex.cpp index e5901452..9c078a97 100644 --- a/dorado/cli/duplex.cpp +++ b/dorado/cli/duplex.cpp @@ -274,16 +274,13 @@ int duplex(int argc, char* argv[]) { cli::add_minimap2_arguments(parser, alignment::dflt_options); cli::add_internal_arguments(parser); + std::set temp_model_paths; try { cli::parse(parser, argc, argv); auto device(parser.visible.get("-x")); auto model(parser.visible.get("model")); - if (model.find("fast") != std::string::npos) { - spdlog::warn("Fast models are currently not recommended for duplex basecalling."); - } - auto reads(parser.visible.get("reads")); std::string pairs_file = parser.visible.get("--pairs"); auto threads = static_cast(parser.visible.get("--threads")); From 85c46e4c0b57bbc0b4c8bd95c7901ac7c8ed2dac Mon Sep 17 00:00:00 2001 From: Steve Malton Date: Tue, 5 Dec 2023 17:26:21 +0000 Subject: [PATCH 5/6] Clean up download models on failure --- dorado/cli/duplex.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dorado/cli/duplex.cpp b/dorado/cli/duplex.cpp index 9c078a97..53a19a26 100644 --- a/dorado/cli/duplex.cpp +++ b/dorado/cli/duplex.cpp @@ -441,6 +441,8 @@ int duplex(int argc, char* argv[]) { load_models(model, mod_bases, mod_bases_models, reads, recursive_file_loading, skip_model_compatibility_check); + temp_model_paths = models.temp_paths; + // create modbase runners first so basecall runners can pick batch sizes based on available memory auto mod_base_runners = create_modbase_runners( models.mods_model_paths, device, default_parameters.mod_base_runners_per_caller, @@ -547,7 +549,7 @@ int duplex(int argc, char* argv[]) { loader.load_reads(reads, parser.visible.get("--recursive"), ReadOrder::BY_CHANNEL); - utils::clean_temporary_models(models.temp_paths); + utils::clean_temporary_models(temp_model_paths); } // Wait for the pipeline to complete. When it does, we collect @@ -567,6 +569,7 @@ int duplex(int argc, char* argv[]) { : std::optional(dump_stats_filter)); } } catch (const std::exception& e) { + utils::clean_temporary_models(temp_model_paths); spdlog::error(e.what()); return 1; } From 80703a388f0dc8c8bf643469e0da5ca6ef33e6c4 Mon Sep 17 00:00:00 2001 From: Steve Malton Date: Tue, 5 Dec 2023 17:27:15 +0000 Subject: [PATCH 6/6] Clean up early if we threw an exception before all the models are downloaded --- dorado/cli/duplex.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/dorado/cli/duplex.cpp b/dorado/cli/duplex.cpp index 53a19a26..ffd14ad4 100644 --- a/dorado/cli/duplex.cpp +++ b/dorado/cli/duplex.cpp @@ -130,21 +130,26 @@ DuplexModels load_models(const std::string& model_arg, if (!mod_bases.empty()) { std::transform( mod_bases.begin(), mod_bases.end(), std::back_inserter(mods_model_paths), - [&model_arg](std::string m) { + [&model_arg](const std::string& m) { return std::filesystem::path(models::get_modification_model(model_arg, m)); }); } else if (!mod_bases_models.empty()) { const auto split = utils::split(mod_bases_models, ','); std::transform(split.begin(), split.end(), std::back_inserter(mods_model_paths), - [&](std::string m) { return std::filesystem::path(m); }); + [&](const std::string& m) { return std::filesystem::path(m); }); } } else { - model_path = model_finder.fetch_simplex_model(); - stereo_model_path = model_finder.fetch_stereo_model(); - mods_model_paths = inferred_selection.has_mods_variant() - ? model_finder.fetch_mods_models() - : std::vector{}; + try { + model_path = model_finder.fetch_simplex_model(); + stereo_model_path = model_finder.fetch_stereo_model(); + mods_model_paths = inferred_selection.has_mods_variant() + ? model_finder.fetch_mods_models() + : std::vector{}; + } catch (const std::exception&) { + utils::clean_temporary_models(model_finder.downloaded_models()); + throw; + } } const auto model_name = model_finder.get_simplex_model_name();