From e95cdb493ba40b374818a67d08ca4221adb107e6 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Thu, 4 Jan 2024 18:16:09 +0100 Subject: [PATCH 01/22] Add find_not_in_parentheses with multiple parens --- libmamba/include/mamba/util/parsers.hpp | 132 ++++++++++++++++++++++- libmamba/src/util/parsers.cpp | 57 +++------- libmamba/tests/src/util/test_parsers.cpp | 39 +++++++ 3 files changed, 180 insertions(+), 48 deletions(-) diff --git a/libmamba/include/mamba/util/parsers.hpp b/libmamba/include/mamba/util/parsers.hpp index 57989d852f..daf1c39ab7 100644 --- a/libmamba/include/mamba/util/parsers.hpp +++ b/libmamba/include/mamba/util/parsers.hpp @@ -12,6 +12,8 @@ #include +#include "mamba/util/conditional.hpp" + namespace mamba::util { @@ -77,20 +79,37 @@ namespace mamba::util char close = ')' ) noexcept -> std::size_t; - auto find_not_in_parentheses( // + [[nodiscard]] auto find_not_in_parentheses( // std::string_view text, - std::string_view val, - ParseError& err, + char c, char open = '(', char close = ')' + ) noexcept -> tl::expected; + + template + auto find_not_in_parentheses( + std::string_view text, + char c, + ParseError& err, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } ) noexcept -> std::size_t; - [[nodiscard]] auto find_not_in_parentheses( // + template + [[nodiscard]] auto find_not_in_parentheses( std::string_view text, char c, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> tl::expected; + + auto find_not_in_parentheses( // + std::string_view text, + std::string_view val, + ParseError& err, char open = '(', char close = ')' - ) noexcept -> tl::expected; + ) noexcept -> std::size_t; [[nodiscard]] auto find_not_in_parentheses( // std::string_view text, @@ -104,5 +123,108 @@ namespace mamba::util */ [[nodiscard]] auto glob_match(std::string_view pattern, std::string_view str, char glob = '*') -> bool; + + /******************** + * Implementation * + ********************/ + + namespace detail + { + template + constexpr auto concat_array(const Arr&... arrs) + { + auto out = std::array{}; + std::size_t out_idx = 0; + auto copy_one = [&](const auto& a) + { + for (auto const& x : a) + { + out[out_idx++] = x; + } + }; + (copy_one(arrs), ...); + return out; + } + + template + constexpr auto find(const std::array& arr, const T& val) -> std::size_t + { + auto pos = std::size_t(N); + for (std::size_t i = 0; i < N; ++i) + { + const bool found = arr[i] == val; + pos = static_cast(found) * i + (1 - static_cast(found)) * pos; + } + return pos; + } + } + + template + auto find_not_in_parentheses( + std::string_view text, + char c, + ParseError& err, + const std::array& open, + const std::array& close + ) noexcept -> std::size_t + { + // TODO(C++20): After allocating tokens and depths here, call an impl function using + // std::span defined in .cpp + static constexpr auto npos = std::string_view::npos; + + const auto tokens = detail::concat_array(std::array{ c }, open, close); + const auto tokens_str = std::string_view(tokens.data(), tokens.size()); + + auto depths = std::array{}; // last for easy branchless access + auto first_val_pos = npos; + auto pos = text.find_first_of(tokens_str); + while (pos != npos) + { + const auto open_pos = detail::find(open, text[pos]); + const auto close_pos = detail::find(close, text[pos]); + depths[open_pos] += int(open_pos < open.size()); + depths[close_pos] -= int(close_pos < open.size()); + depths[open_pos] = if_else( + open_pos == close_pos, + if_else(depths[open_pos] > 0, 0, 1), // swap 0 and 1 + depths[open_pos] + ); + depths[P] = 0; + + for (auto d : depths) + { + err = if_else(d < 0, ParseError::InvalidInput, err); + } + first_val_pos = if_else((text[pos] == c) && (pos == npos), pos, first_val_pos); + if ((text[pos] == c) && (depths == decltype(depths){})) + { + return pos; + } + pos = text.find_first_of(tokens_str, pos + 1); + } + err = if_else( + err == ParseError::Ok, + if_else(depths == decltype(depths){}, ParseError::NotFound, ParseError::InvalidInput), + err + ); + return first_val_pos; + } + + template + auto find_not_in_parentheses( + std::string_view text, + char c, + const std::array& open, + const std::array& close + ) noexcept -> tl::expected + { + auto err = ParseError::Ok; + const auto pos = find_not_in_parentheses(text, c, err, open, close); + if (err != ParseError::Ok) + { + return tl::make_unexpected(err); + } + return { pos }; + } } #endif diff --git a/libmamba/src/util/parsers.cpp b/libmamba/src/util/parsers.cpp index 1a92ed4c20..a494fb11d9 100644 --- a/libmamba/src/util/parsers.cpp +++ b/libmamba/src/util/parsers.cpp @@ -108,36 +108,23 @@ namespace mamba::util char close ) noexcept -> std::size_t { - static constexpr auto npos = std::string_view::npos; - - const auto tokens = std::array{ c, open, close }; - const auto tokens_str = std::string_view(tokens.data(), tokens.size()); + return find_not_in_parentheses(text, c, err, std::array{ open }, std::array{ close }); + } - int depth = 0; - auto first_val_pos = npos; - auto pos = text.find_first_of(tokens_str); - while (pos != npos) + auto find_not_in_parentheses( // + std::string_view text, + char c, + char open, + char close + ) noexcept -> tl::expected + { + auto err = ParseError::Ok; + const auto pos = find_not_in_parentheses(text, c, err, open, close); + if (err != ParseError::Ok) { - depth = if_else( - (open == close) && (text[pos] == open), - if_else(depth > 0, 0, 1), // swap 0 and 1 - depth + int(text[pos] == open) - int(text[pos] == close) - ); - // Set error but sill try to find the value - err = if_else(depth < 0, ParseError::InvalidInput, err); - first_val_pos = if_else((text[pos] == c) && (pos == npos), pos, first_val_pos); - if ((depth == 0) && (text[pos] == c)) - { - return pos; - } - pos = text.find_first_of(tokens_str, pos + 1); + return tl::make_unexpected(err); } - err = if_else( - err == ParseError::Ok, - if_else(depth == 0, ParseError::NotFound, ParseError::InvalidInput), - err - ); - return first_val_pos; + return { pos }; } auto find_not_in_parentheses( // @@ -187,22 +174,6 @@ namespace mamba::util return first_val_pos; } - auto find_not_in_parentheses( // - std::string_view text, - char c, - char open, - char close - ) noexcept -> tl::expected - { - auto err = ParseError::Ok; - const auto pos = find_not_in_parentheses(text, c, err, open, close); - if (err != ParseError::Ok) - { - return tl::make_unexpected(err); - } - return { pos }; - } - auto find_not_in_parentheses( // std::string_view text, std::string_view val, diff --git a/libmamba/tests/src/util/test_parsers.cpp b/libmamba/tests/src/util/test_parsers.cpp index a5df7f3a6e..42920e2b92 100644 --- a/libmamba/tests/src/util/test_parsers.cpp +++ b/libmamba/tests/src/util/test_parsers.cpp @@ -61,6 +61,29 @@ TEST_SUITE("util::parsers") CHECK_EQ(find_not_in_parentheses("(hello, world,", ',').error(), ParseError::InvalidInput); CHECK_EQ(find_not_in_parentheses("(hello", ',').error(), ParseError::InvalidInput); + + static constexpr auto opens = std::array{ '[', '(' }; + static constexpr auto closes = std::array{ ']', ')' }; + CHECK_EQ( + find_not_in_parentheses("(hello, world), [welcome, here],", ',', opens, closes), + 14 + ); + CHECK_EQ( + find_not_in_parentheses("([(hello)], ([world])), [welcome, here],", ',', opens, closes), + 22 + ); + CHECK_EQ(find_not_in_parentheses("[hello, world](welcome, here),", ',', opens, closes), 29); + CHECK_EQ( + find_not_in_parentheses("(hello, ]world,) welcome, here],", ',', opens, closes).error(), + ParseError::InvalidInput + ); + + // The following unfortunaltely does not work as we would need to allocate a stack + // to keep track of the opening and closing of parentheses. + // CHECK_EQ( + // find_not_in_parentheses("(hello, [world, )welcome, here],", ',', opens, + // closes).error(), ParseError::InvalidInput + // ); } SUBCASE("Single char and similar open/close pair") @@ -77,6 +100,22 @@ TEST_SUITE("util::parsers") find_not_in_parentheses(R"("some, csv)", ',', '"', '"').error(), ParseError::InvalidInput ); + + static constexpr auto opens = std::array{ '[', '(', '\'', '"' }; + static constexpr auto closes = std::array{ ']', ')', '\'', '"' }; + CHECK_EQ( + find_not_in_parentheses(R"('("hello", world)', [welcome, here],)", ',', opens, closes), + 18 + ); + CHECK_EQ( + find_not_in_parentheses("('[(hello)], ([world])'), [welcome, here],", ',', opens, closes), + 24 + ); + CHECK_EQ( + find_not_in_parentheses("('hello', ']world,) welcome, here],", ',', opens, closes) + .error(), + ParseError::InvalidInput + ); } SUBCASE("Substring and different open/close pair") From 61c7dfe6f403736022d6b417301370fc83f23f1e Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 5 Jan 2024 10:55:39 +0100 Subject: [PATCH 02/22] Refactor MatchSpec URL with md5 --- libmamba/include/mamba/specs/match_spec.hpp | 9 +++ libmamba/src/specs/match_spec.cpp | 61 +++++++++++++++++--- libmamba/tests/src/specs/test_match_spec.cpp | 15 +++++ 3 files changed, 76 insertions(+), 9 deletions(-) diff --git a/libmamba/include/mamba/specs/match_spec.hpp b/libmamba/include/mamba/specs/match_spec.hpp index 6ad784de9a..17d23c617e 100644 --- a/libmamba/include/mamba/specs/match_spec.hpp +++ b/libmamba/include/mamba/specs/match_spec.hpp @@ -26,6 +26,15 @@ namespace mamba::specs using NameSpec = GlobSpec; using BuildStringSpec = GlobSpec; + inline static constexpr char url_md5_sep = '#'; + inline static constexpr char prefered_list_open = '['; + inline static constexpr char prefered_list_close = ']'; + inline static constexpr char alt_list_open = '('; + inline static constexpr char alt_list_close = ')'; + inline static constexpr char prefered_quote = '"'; + inline static constexpr char alt_quote = '\''; + + [[nodiscard]] static auto parse(std::string_view spec) -> MatchSpec; [[nodiscard]] static auto parse_url(std::string_view spec) -> MatchSpec; diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index 171b9a98c3..96744b2e90 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -101,26 +101,69 @@ namespace mamba::specs } } + namespace + { + inline constexpr auto open_or_quote = std::array{ + MatchSpec::prefered_list_open, + MatchSpec::alt_list_open, + MatchSpec::prefered_quote, + MatchSpec::alt_quote, + }; + + inline constexpr auto close_or_quote = std::array{ + MatchSpec::prefered_list_close, + MatchSpec::alt_list_close, + MatchSpec::prefered_quote, + MatchSpec::alt_quote, + }; + + auto is_hash(std::string_view text) -> bool + { + constexpr auto is_hash_char = [](char c) -> bool + { + auto const lower = util::to_lower(c); + return util::is_digit(c) || (lower == 'a') || (lower == 'b') || (lower == 'c') + || (lower == 'd') || (lower == 'e') || (lower == 'f'); + }; + return std::all_of(text.cbegin(), text.cend(), is_hash_char); + } + } + auto MatchSpec::parse(std::string_view spec) -> MatchSpec { - auto spec_str = std::string(spec); - auto out = MatchSpec(); - if (spec_str.empty()) + static constexpr auto npos = std::string_view::npos; + spec = util::strip(spec); + if (spec.empty()) { - return out; + return {}; } - if (std::size_t idx = spec_str.find('#'); idx != std::string::npos) + // A plain URL like https://conda.anaconda.org/conda-forge/linux-64/pkg-6.4-bld.conda + if (has_archive_extension(spec)) { - spec_str = spec_str.substr(0, idx); + return MatchSpec::parse_url(spec); } - spec_str = util::strip(spec_str); - if (has_archive_extension(spec_str)) + // A URL with hash, generated by `mamba env export --explicit` like + // https://conda.anaconda.org/conda-forge/linux-64/pkg-6.4-bld.conda#7dbaa197d7ba6032caf7ae7f32c1efa0 + if (const auto idx = spec.rfind(url_md5_sep); idx != npos) { - return MatchSpec::parse_url(spec_str); + auto url = spec.substr(0, idx); + auto hash = spec.substr(idx + 1); + if (has_archive_extension(url)) + { + auto out = MatchSpec::parse_url(url); + if (is_hash(hash)) + { + out.set_md5(std::string(hash)); + } + return out; + } } + auto spec_str = std::string(spec); + auto out = MatchSpec(); + auto extract_kv = [&spec_str](const std::string& kv_string, auto& map) { static const std::regex kv_re("([a-zA-Z0-9_-]+?)=([\"\']?)([^\'\"]*?)(\\2)(?:[\'\", ]|$)"); diff --git a/libmamba/tests/src/specs/test_match_spec.cpp b/libmamba/tests/src/specs/test_match_spec.cpp index df4a04ab84..e117f72ba8 100644 --- a/libmamba/tests/src/specs/test_match_spec.cpp +++ b/libmamba/tests/src/specs/test_match_spec.cpp @@ -81,6 +81,21 @@ TEST_SUITE("specs::match_spec") CHECK_EQ(ms.name().str(), "python"); CHECK_EQ(ms.build_number().str(), "<=3"); } + { + auto ms = MatchSpec::parse( + "https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-h59595ed_2.conda" + "#7dbaa197d7ba6032caf7ae7f32c1efa0" + ); + CHECK_EQ(ms.name().str(), "ncurses"); + CHECK_EQ(ms.version().str(), "==6.4"); + CHECK_EQ(ms.build_string().str(), "h59595ed_2"); + CHECK_EQ( + ms.url(), + "https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-h59595ed_2.conda" + ); + CHECK_EQ(ms.filename(), "ncurses-6.4-h59595ed_2.conda"); + CHECK_EQ(ms.md5(), "7dbaa197d7ba6032caf7ae7f32c1efa0"); + } { auto ms = MatchSpec::parse( "https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2" From 4bd5c0dd6ffeb4eaa26d672b2e85a7fc93baa24c Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 27 Feb 2024 10:37:16 +0100 Subject: [PATCH 03/22] Refactor find_not_in_parentheses --- libmamba/include/mamba/util/parsers.hpp | 172 +++++++++++++++++------ libmamba/src/util/parsers.cpp | 56 +++----- libmamba/tests/src/util/test_parsers.cpp | 46 ++++++ 3 files changed, 196 insertions(+), 78 deletions(-) diff --git a/libmamba/include/mamba/util/parsers.hpp b/libmamba/include/mamba/util/parsers.hpp index daf1c39ab7..b136451227 100644 --- a/libmamba/include/mamba/util/parsers.hpp +++ b/libmamba/include/mamba/util/parsers.hpp @@ -13,6 +13,7 @@ #include #include "mamba/util/conditional.hpp" +#include "mamba/util/string.hpp" namespace mamba::util { @@ -65,7 +66,7 @@ namespace mamba::util ) noexcept -> tl::expected; /** - * Find a character, except in mathcing parentheses pairs. + * Find a character or string, except in matching parentheses pairs. * * Find the first occurence of the given character, except if such character is inside a valid * pair of parentheses. @@ -118,6 +119,23 @@ namespace mamba::util char close = ')' ) noexcept -> tl::expected; + template + auto find_not_in_parentheses( + std::string_view text, + std::string_view val, + ParseError& err, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> std::size_t; + + template + [[nodiscard]] auto find_not_in_parentheses( + std::string_view text, + std::string_view val, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> tl::expected; + /** * Test wether the glob pattern @p pattern matches the string @p str. */ @@ -128,7 +146,7 @@ namespace mamba::util * Implementation * ********************/ - namespace detail + namespace detail_parsers { template constexpr auto concat_array(const Arr&... arrs) @@ -157,6 +175,88 @@ namespace mamba::util } return pos; } + + inline auto front(char c) -> char + { + return c; + } + + inline auto front(std::string_view str) -> char + { + return str.front(); + } + + inline auto empty(char) -> bool + { + return false; + } + + inline auto empty(std::string_view str) -> bool + { + return str.empty(); + } + + template + auto find_not_in_parentheses_impl( + std::string_view text, + const Str& val, + ParseError& err, + const std::array& open, + const std::array& close + ) noexcept -> std::size_t + { + // TODO(C++20): After allocating tokens and depths here, call an impl function using + // std::span defined in .cpp + static constexpr auto npos = std::string_view::npos; + + if (detail_parsers::empty(val)) + { + err = ParseError::InvalidInput; + return npos; + } + + const auto tokens = detail_parsers::concat_array( + std::array{ detail_parsers::front(val) }, + open, + close + ); + const auto tokens_str = std::string_view(tokens.data(), tokens.size()); + + auto depths = std::array{}; // last for easy branchless access + auto first_val_pos = npos; + auto pos = text.find_first_of(tokens_str); + while (pos != npos) + { + const auto open_pos = detail_parsers::find(open, text[pos]); + const auto close_pos = detail_parsers::find(close, text[pos]); + depths[open_pos] += int(open_pos < open.size()); + depths[close_pos] -= int(close_pos < open.size()); + depths[open_pos] = if_else( + open_pos == close_pos, + if_else(depths[open_pos] > 0, 0, 1), // swap 0 and 1 + depths[open_pos] + ); + depths[P] = 0; + + for (auto d : depths) + { + err = if_else(d < 0, ParseError::InvalidInput, err); + } + const bool match = starts_with(text.substr(pos), val); + first_val_pos = if_else(match && (pos == npos), pos, first_val_pos); + if (match && (depths == decltype(depths){})) + { + return pos; + } + pos = text.find_first_of(tokens_str, pos + 1); + } + err = if_else( + err == ParseError::Ok, + if_else(depths == decltype(depths){}, ParseError::NotFound, ParseError::InvalidInput), + err + ); + return first_val_pos; + } } template @@ -168,58 +268,48 @@ namespace mamba::util const std::array& close ) noexcept -> std::size_t { - // TODO(C++20): After allocating tokens and depths here, call an impl function using - // std::span defined in .cpp - static constexpr auto npos = std::string_view::npos; - - const auto tokens = detail::concat_array(std::array{ c }, open, close); - const auto tokens_str = std::string_view(tokens.data(), tokens.size()); + return detail_parsers::find_not_in_parentheses_impl(text, c, err, open, close); + } - auto depths = std::array{}; // last for easy branchless access - auto first_val_pos = npos; - auto pos = text.find_first_of(tokens_str); - while (pos != npos) + template + auto find_not_in_parentheses( + std::string_view text, + char c, + const std::array& open, + const std::array& close + ) noexcept -> tl::expected + { + auto err = ParseError::Ok; + const auto pos = find_not_in_parentheses(text, c, err, open, close); + if (err != ParseError::Ok) { - const auto open_pos = detail::find(open, text[pos]); - const auto close_pos = detail::find(close, text[pos]); - depths[open_pos] += int(open_pos < open.size()); - depths[close_pos] -= int(close_pos < open.size()); - depths[open_pos] = if_else( - open_pos == close_pos, - if_else(depths[open_pos] > 0, 0, 1), // swap 0 and 1 - depths[open_pos] - ); - depths[P] = 0; - - for (auto d : depths) - { - err = if_else(d < 0, ParseError::InvalidInput, err); - } - first_val_pos = if_else((text[pos] == c) && (pos == npos), pos, first_val_pos); - if ((text[pos] == c) && (depths == decltype(depths){})) - { - return pos; - } - pos = text.find_first_of(tokens_str, pos + 1); + return tl::make_unexpected(err); } - err = if_else( - err == ParseError::Ok, - if_else(depths == decltype(depths){}, ParseError::NotFound, ParseError::InvalidInput), - err - ); - return first_val_pos; + return { pos }; } template auto find_not_in_parentheses( std::string_view text, - char c, + std::string_view val, + ParseError& err, + const std::array& open, + const std::array& close + ) noexcept -> std::size_t + { + return detail_parsers::find_not_in_parentheses_impl(text, val, err, open, close); + } + + template + [[nodiscard]] auto find_not_in_parentheses( + std::string_view text, + std::string_view val, const std::array& open, const std::array& close ) noexcept -> tl::expected { auto err = ParseError::Ok; - const auto pos = find_not_in_parentheses(text, c, err, open, close); + const auto pos = find_not_in_parentheses(text, val, err, open, close); if (err != ParseError::Ok) { return tl::make_unexpected(err); diff --git a/libmamba/src/util/parsers.cpp b/libmamba/src/util/parsers.cpp index a494fb11d9..1b4a5b4a5c 100644 --- a/libmamba/src/util/parsers.cpp +++ b/libmamba/src/util/parsers.cpp @@ -7,12 +7,16 @@ #include #include -#include "mamba/util/conditional.hpp" #include "mamba/util/parsers.hpp" #include "mamba/util/string.hpp" namespace mamba::util { + + /******************************* + * find_matching_parentheses * + *******************************/ + auto find_matching_parentheses_idx( // std::string_view text, ParseError& err, @@ -100,6 +104,10 @@ namespace mamba::util return { (start == std::string_view::npos) ? "" : text.substr(start, end) }; } + /***************************** + * find_not_in_parentheses * + *****************************/ + auto find_not_in_parentheses( // std::string_view text, char c, @@ -135,43 +143,13 @@ namespace mamba::util char close ) noexcept -> std::size_t { - static constexpr auto npos = std::string_view::npos; - - if (val.empty()) - { - err = ParseError::InvalidInput; - return npos; - } - - const auto tokens = std::array{ val.front(), open, close }; - const auto tokens_str = std::string_view(tokens.data(), tokens.size()); - - int depth = 0; - auto first_val_pos = npos; - auto pos = text.find_first_of(tokens_str); - while (pos != npos) - { - depth = if_else( - (open == close) && (text[pos] == open), - if_else(depth > 0, 0, 1), // swap 0 and 1 - depth + int(text[pos] == open) - int(text[pos] == close) - ); - // Set error but sill try to find the value - err = if_else(depth < 0, ParseError::InvalidInput, err); - const bool match = starts_with(text.substr(pos), val); - first_val_pos = if_else(match && (pos == npos), pos, first_val_pos); - if ((depth == 0) && match) - { - return pos; - } - pos = text.find_first_of(tokens_str, pos + 1); - } - err = if_else( - err == ParseError::Ok, - if_else(depth == 0, ParseError::NotFound, ParseError::InvalidInput), - err + return detail_parsers::find_not_in_parentheses_impl( + text, + val, + err, + std::array{ open }, + std::array{ close } ); - return first_val_pos; } auto find_not_in_parentheses( // @@ -190,6 +168,10 @@ namespace mamba::util return { pos }; } + /********** + * glob * + **********/ + namespace { auto glob_match_impl(std::string_view pattern, std::string_view str, char glob) -> bool diff --git a/libmamba/tests/src/util/test_parsers.cpp b/libmamba/tests/src/util/test_parsers.cpp index 42920e2b92..66b54fe104 100644 --- a/libmamba/tests/src/util/test_parsers.cpp +++ b/libmamba/tests/src/util/test_parsers.cpp @@ -132,6 +132,26 @@ TEST_SUITE("util::parsers") // CHECK_EQ(find_not_in_parentheses("(hello::world::", "::").error(), ParseError::InvalidInput); CHECK_EQ(find_not_in_parentheses("(hello", "::").error(), ParseError::InvalidInput); + + static constexpr auto opens = std::array{ '[', '(' }; + static constexpr auto closes = std::array{ ']', ')' }; + + CHECK_EQ( + find_not_in_parentheses("(some str)", "", opens, closes).error(), + ParseError::InvalidInput + ); + CHECK_EQ( + find_not_in_parentheses(R"((hello , world), [welcome , here] ,elf)", " ,", opens, closes), + 33 + ); + CHECK_EQ( + find_not_in_parentheses("([(hello)] , ([world])), [welcome , here] ,elf", " ,", opens, closes), + 41 + ); + CHECK_EQ( + find_not_in_parentheses("(hello , ]world,) welcome, here],", ", ", opens, closes).error(), + ParseError::InvalidInput + ); } SUBCASE("Substring and similar open/close pair") @@ -148,6 +168,32 @@ TEST_SUITE("util::parsers") find_not_in_parentheses(R"("some::csv)", "::", '"', '"').error(), ParseError::InvalidInput ); + + static constexpr auto opens = std::array{ '[', '(', '\'', '"' }; + static constexpr auto closes = std::array{ ']', ')', '\'', '"' }; + + CHECK_EQ( + find_not_in_parentheses("(some str)", "", opens, closes).error(), + ParseError::InvalidInput + ); + CHECK_EQ( + find_not_in_parentheses(R"('("hello" , world)', [welcome , here] ,elf)", " ,", opens, closes), + 37 + ); + CHECK_EQ( + find_not_in_parentheses( + "('[(hello)] , ([world])'), [welcome , here] ,elf", + " ,", + opens, + closes + ), + 43 + ); + CHECK_EQ( + find_not_in_parentheses("('hello' , ']world,) welcome, here],", ", ", opens, closes) + .error(), + ParseError::InvalidInput + ); } } From f729cce755557e63444c92fe9bdd8cdccaba2539 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 27 Feb 2024 12:26:46 +0100 Subject: [PATCH 04/22] Refactor find_not_in_parentheses for genericity --- libmamba/include/mamba/util/parsers.hpp | 40 +++++++++++++++++++++---- libmamba/src/util/parsers.cpp | 3 +- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/libmamba/include/mamba/util/parsers.hpp b/libmamba/include/mamba/util/parsers.hpp index b136451227..0d0bc7813c 100644 --- a/libmamba/include/mamba/util/parsers.hpp +++ b/libmamba/include/mamba/util/parsers.hpp @@ -196,13 +196,27 @@ namespace mamba::util return str.empty(); } - template + struct FindParenthesesSearcher + { + auto find_first(std::string_view text, std::string_view token_str) + { + return text.find_first_of(token_str); + } + + auto find_next(std::string_view text, std::string_view token_str, std::size_t pos) + { + return text.find_first_of(token_str, pos + 1); + } + }; + + template auto find_not_in_parentheses_impl( std::string_view text, const Str& val, ParseError& err, const std::array& open, - const std::array& close + const std::array& close, + Searcher&& searcher ) noexcept -> std::size_t { // TODO(C++20): After allocating tokens and depths here, call an impl function using @@ -224,7 +238,7 @@ namespace mamba::util auto depths = std::array{}; // last for easy branchless access auto first_val_pos = npos; - auto pos = text.find_first_of(tokens_str); + auto pos = searcher.find_first(text, tokens_str); while (pos != npos) { const auto open_pos = detail_parsers::find(open, text[pos]); @@ -248,7 +262,7 @@ namespace mamba::util { return pos; } - pos = text.find_first_of(tokens_str, pos + 1); + pos = searcher.find_next(text, tokens_str, pos); } err = if_else( err == ParseError::Ok, @@ -268,7 +282,14 @@ namespace mamba::util const std::array& close ) noexcept -> std::size_t { - return detail_parsers::find_not_in_parentheses_impl(text, c, err, open, close); + return detail_parsers::find_not_in_parentheses_impl( + text, + c, + err, + open, + close, + detail_parsers::FindParenthesesSearcher() + ); } template @@ -297,7 +318,14 @@ namespace mamba::util const std::array& close ) noexcept -> std::size_t { - return detail_parsers::find_not_in_parentheses_impl(text, val, err, open, close); + return detail_parsers::find_not_in_parentheses_impl( + text, + val, + err, + open, + close, + detail_parsers::FindParenthesesSearcher() + ); } template diff --git a/libmamba/src/util/parsers.cpp b/libmamba/src/util/parsers.cpp index 1b4a5b4a5c..d2c69f05f7 100644 --- a/libmamba/src/util/parsers.cpp +++ b/libmamba/src/util/parsers.cpp @@ -148,7 +148,8 @@ namespace mamba::util val, err, std::array{ open }, - std::array{ close } + std::array{ close }, + detail_parsers::FindParenthesesSearcher() ); } From 8e3adc674267062eff17b614943a1f4327ffabff Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 27 Feb 2024 12:40:02 +0100 Subject: [PATCH 05/22] Use npos ins find_not_in_parentheses --- libmamba/include/mamba/util/parsers.hpp | 15 +++++++++------ libmamba/tests/src/util/test_parsers.cpp | 24 ++++++++++-------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/libmamba/include/mamba/util/parsers.hpp b/libmamba/include/mamba/util/parsers.hpp index 0d0bc7813c..31c8b864ab 100644 --- a/libmamba/include/mamba/util/parsers.hpp +++ b/libmamba/include/mamba/util/parsers.hpp @@ -71,6 +71,8 @@ namespace mamba::util * Find the first occurence of the given character, except if such character is inside a valid * pair of parentheses. * Open and closing pairs need not be differents. + * If not found, ``std::string_view::npos`` is returned but no error is set as this is not + * considered an error. */ auto find_not_in_parentheses( // std::string_view text, @@ -264,12 +266,13 @@ namespace mamba::util } pos = searcher.find_next(text, tokens_str, pos); } - err = if_else( - err == ParseError::Ok, - if_else(depths == decltype(depths){}, ParseError::NotFound, ParseError::InvalidInput), - err - ); - return first_val_pos; + // Check if all parentheses are properly closed + if (depths != decltype(depths){}) + { + err = ParseError::InvalidInput; + return first_val_pos; + } + return npos; // not found } } diff --git a/libmamba/tests/src/util/test_parsers.cpp b/libmamba/tests/src/util/test_parsers.cpp index 66b54fe104..6cbcbe56f5 100644 --- a/libmamba/tests/src/util/test_parsers.cpp +++ b/libmamba/tests/src/util/test_parsers.cpp @@ -12,6 +12,8 @@ using namespace mamba::util; TEST_SUITE("util::parsers") { + inline static constexpr auto npos = std::string_view::npos; + TEST_CASE("find_matching_parentheses") { SUBCASE("Different open/close pair") @@ -49,9 +51,9 @@ TEST_SUITE("util::parsers") { SUBCASE("Single char and different open/close pair") { - CHECK_EQ(find_not_in_parentheses("", ',').error(), ParseError::NotFound); - CHECK_EQ(find_not_in_parentheses("Nothing to see here", ',').error(), ParseError::NotFound); - CHECK_EQ(find_not_in_parentheses("(hello, world)", ',').error(), ParseError::NotFound); + CHECK_EQ(find_not_in_parentheses("", ','), npos); + CHECK_EQ(find_not_in_parentheses("Nothing to see here", ','), npos); + CHECK_EQ(find_not_in_parentheses("(hello, world)", ','), npos); CHECK_EQ(find_not_in_parentheses("hello, world", ','), 5); CHECK_EQ(find_not_in_parentheses("hello, world, welcome", ','), 5); @@ -88,10 +90,7 @@ TEST_SUITE("util::parsers") SUBCASE("Single char and similar open/close pair") { - CHECK_EQ( - find_not_in_parentheses(R"("some, csv")", ',', '"', '"').error(), - ParseError::NotFound - ); + CHECK_EQ(find_not_in_parentheses(R"("some, csv")", ',', '"', '"'), npos); CHECK_EQ(find_not_in_parentheses(R"("some, csv",value)", ',', '"', '"'), 11); CHECK_EQ(find_not_in_parentheses(R"("some, csv""value","here")", ',', '"', '"'), 18); @@ -120,9 +119,9 @@ TEST_SUITE("util::parsers") SUBCASE("Substring and different open/close pair") { - CHECK_EQ(find_not_in_parentheses("", "::").error(), ParseError::NotFound); - CHECK_EQ(find_not_in_parentheses("Nothing to see here", "::").error(), ParseError::NotFound); - CHECK_EQ(find_not_in_parentheses("(hello::world)", "::").error(), ParseError::NotFound); + CHECK_EQ(find_not_in_parentheses("", "::"), npos); + CHECK_EQ(find_not_in_parentheses("Nothing to see here", "::"), npos); + CHECK_EQ(find_not_in_parentheses("(hello::world)", "::"), npos); CHECK_EQ(find_not_in_parentheses("hello::world", "::"), 5); CHECK_EQ(find_not_in_parentheses("hello::world::welcome", "::"), 5); @@ -156,10 +155,7 @@ TEST_SUITE("util::parsers") SUBCASE("Substring and similar open/close pair") { - CHECK_EQ( - find_not_in_parentheses(R"("some::csv")", "::", '"', '"').error(), - ParseError::NotFound - ); + CHECK_EQ(find_not_in_parentheses(R"("some::csv")", "::", '"', '"'), npos); CHECK_EQ(find_not_in_parentheses(R"("some::csv"::value)", "::", '"', '"'), 11); CHECK_EQ(find_not_in_parentheses(R"("some::csv""value"::"here")", "::", '"', '"'), 18); From 4983cf0aa1981c158197189b7536b80b838cc44b Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 27 Feb 2024 14:33:53 +0100 Subject: [PATCH 06/22] Add rfind_not_in_parentheses --- libmamba/include/mamba/util/parsers.hpp | 162 +++++++++++++++++++++++ libmamba/src/util/parsers.cpp | 65 +++++++++ libmamba/tests/src/util/test_parsers.cpp | 140 ++++++++++++++++++++ 3 files changed, 367 insertions(+) diff --git a/libmamba/include/mamba/util/parsers.hpp b/libmamba/include/mamba/util/parsers.hpp index 31c8b864ab..6ad74c8915 100644 --- a/libmamba/include/mamba/util/parsers.hpp +++ b/libmamba/include/mamba/util/parsers.hpp @@ -73,6 +73,8 @@ namespace mamba::util * Open and closing pairs need not be differents. * If not found, ``std::string_view::npos`` is returned but no error is set as this is not * considered an error. + * Due to a greedy approach, the functin may not be able to detect all errors, but will be + * correct when parentheses are correctly matched. */ auto find_not_in_parentheses( // std::string_view text, @@ -138,6 +140,81 @@ namespace mamba::util const std::array& close = { ')', ']' } ) noexcept -> tl::expected; + /** + * Find the last character or string, except in matching parentheses pairs. + * + * Find the last occurence of the given character, except if such character is inside a valid + * pair of parentheses. + * Open and closing pairs need not be differents. + * If not found, ``std::string_view::npos`` is returned but no error is set as this is not + * considered an error. + * Due to a greedy approach, the functin may not be able to detect all errors, but will be + * correct when parentheses are correctly matched. + */ + auto rfind_not_in_parentheses( // + std::string_view text, + char c, + ParseError& err, + char open = '(', + char close = ')' + ) noexcept -> std::size_t; + + [[nodiscard]] auto rfind_not_in_parentheses( // + std::string_view text, + char c, + char open = '(', + char close = ')' + ) noexcept -> tl::expected; + + template + auto rfind_not_in_parentheses( + std::string_view text, + char c, + ParseError& err, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> std::size_t; + + template + [[nodiscard]] auto rfind_not_in_parentheses( + std::string_view text, + char c, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> tl::expected; + + auto rfind_not_in_parentheses( // + std::string_view text, + std::string_view val, + ParseError& err, + char open = '(', + char close = ')' + ) noexcept -> std::size_t; + + [[nodiscard]] auto rfind_not_in_parentheses( // + std::string_view text, + std::string_view val, + char open = '(', + char close = ')' + ) noexcept -> tl::expected; + + template + auto rfind_not_in_parentheses( + std::string_view text, + std::string_view val, + ParseError& err, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> std::size_t; + + template + [[nodiscard]] auto rfind_not_in_parentheses( + std::string_view text, + std::string_view val, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> tl::expected; + /** * Test wether the glob pattern @p pattern matches the string @p str. */ @@ -211,6 +288,19 @@ namespace mamba::util } }; + struct RFindParenthesesSearcher + { + auto find_first(std::string_view text, std::string_view token_str) + { + return text.find_last_of(token_str); + } + + auto find_next(std::string_view text, std::string_view token_str, std::size_t pos) + { + return (pos == 0) ? text.npos : text.find_last_of(token_str, pos - 1); + } + }; + template auto find_not_in_parentheses_impl( std::string_view text, @@ -347,5 +437,77 @@ namespace mamba::util } return { pos }; } + + template + auto rfind_not_in_parentheses( + std::string_view text, + char c, + ParseError& err, + const std::array& open, + const std::array& close + ) noexcept -> std::size_t + { + return detail_parsers::find_not_in_parentheses_impl( + text, + c, + err, + close, // swaped + open, + detail_parsers::RFindParenthesesSearcher() + ); + } + + template + auto rfind_not_in_parentheses( + std::string_view text, + char c, + const std::array& open, + const std::array& close + ) noexcept -> tl::expected + { + auto err = ParseError::Ok; + const auto pos = rfind_not_in_parentheses(text, c, err, open, close); + if (err != ParseError::Ok) + { + return tl::make_unexpected(err); + } + return { pos }; + } + + template + auto rfind_not_in_parentheses( + std::string_view text, + std::string_view val, + ParseError& err, + const std::array& open, + const std::array& close + ) noexcept -> std::size_t + { + return detail_parsers::find_not_in_parentheses_impl( + text, + val, + err, + close, // swaped + open, + detail_parsers::RFindParenthesesSearcher() + ); + } + + template + [[nodiscard]] auto rfind_not_in_parentheses( + std::string_view text, + std::string_view val, + const std::array& open, + const std::array& close + ) noexcept -> tl::expected + { + auto err = ParseError::Ok; + const auto pos = rfind_not_in_parentheses(text, val, err, open, close); + if (err != ParseError::Ok) + { + return tl::make_unexpected(err); + } + return { pos }; + } } #endif diff --git a/libmamba/src/util/parsers.cpp b/libmamba/src/util/parsers.cpp index d2c69f05f7..420c361fa7 100644 --- a/libmamba/src/util/parsers.cpp +++ b/libmamba/src/util/parsers.cpp @@ -169,6 +169,71 @@ namespace mamba::util return { pos }; } + /****************************** + * rfind_not_in_parentheses * + ******************************/ + + auto rfind_not_in_parentheses( // + std::string_view text, + char c, + ParseError& err, + char open, + char close + ) noexcept -> std::size_t + { + return rfind_not_in_parentheses(text, c, err, std::array{ open }, std::array{ close }); + } + + auto rfind_not_in_parentheses( // + std::string_view text, + char c, + char open, + char close + ) noexcept -> tl::expected + { + auto err = ParseError::Ok; + const auto pos = rfind_not_in_parentheses(text, c, err, open, close); + if (err != ParseError::Ok) + { + return tl::make_unexpected(err); + } + return { pos }; + } + + auto rfind_not_in_parentheses( // + std::string_view text, + std::string_view val, + ParseError& err, + char open, + char close + ) noexcept -> std::size_t + { + return detail_parsers::find_not_in_parentheses_impl( + text, + val, + err, + std::array{ close }, // swaped + std::array{ open }, + detail_parsers::RFindParenthesesSearcher() + ); + } + + auto rfind_not_in_parentheses( // + std::string_view text, + std::string_view val, + char open, + char close + ) noexcept -> tl::expected + { + auto err = ParseError::Ok; + const auto pos = rfind_not_in_parentheses(text, val, err, open, close); + if (err != ParseError::Ok) + { + return tl::make_unexpected(err); + } + return { pos }; + } + /********** * glob * **********/ diff --git a/libmamba/tests/src/util/test_parsers.cpp b/libmamba/tests/src/util/test_parsers.cpp index 6cbcbe56f5..75f8e3e27d 100644 --- a/libmamba/tests/src/util/test_parsers.cpp +++ b/libmamba/tests/src/util/test_parsers.cpp @@ -193,6 +193,146 @@ TEST_SUITE("util::parsers") } } + TEST_CASE("rfind_not_in_parentheses") + { + SUBCASE("Single char and different open/close pair") + { + CHECK_EQ(rfind_not_in_parentheses("", ','), npos); + CHECK_EQ(rfind_not_in_parentheses("Nothing to see here", ','), npos); + CHECK_EQ(rfind_not_in_parentheses("(hello, world)", ','), npos); + + CHECK_EQ(rfind_not_in_parentheses("hello, world", ','), 5); + CHECK_EQ(rfind_not_in_parentheses("hello, world, welcome", ','), 12); + CHECK_EQ(rfind_not_in_parentheses("(hello, world), (welcome, here),", ','), 31); + CHECK_EQ(rfind_not_in_parentheses("(hello, world), (welcome, here)", ',', '[', ']'), 24); + CHECK_EQ(rfind_not_in_parentheses("[hello, world](welcome, here)", ',', '(', ')'), 6); + + CHECK_EQ(find_not_in_parentheses("(hello, world,", ',').error(), ParseError::InvalidInput); + CHECK_EQ(find_not_in_parentheses("(hello", ',').error(), ParseError::InvalidInput); + + static constexpr auto opens = std::array{ '[', '(' }; + static constexpr auto closes = std::array{ ']', ')' }; + CHECK_EQ( + rfind_not_in_parentheses(",(hello, world), [welcome, here]", ',', opens, closes), + 15 + ); + CHECK_EQ( + rfind_not_in_parentheses(",[welcome, here], ([(hello)], ([world]))", ',', opens, closes), + 16 + ); + CHECK_EQ(rfind_not_in_parentheses(",[hello, world](welcome, here)", ',', opens, closes), 0); + CHECK_EQ( + rfind_not_in_parentheses(",(hello, ]world,) welcome, here]", ',', opens, closes).error(), + ParseError::InvalidInput + ); + + CHECK_EQ(rfind_not_in_parentheses("this, is, (a, string)", ',', opens, closes), 8); + CHECK_EQ(rfind_not_in_parentheses(",this (a, string)", ',', opens, closes), 0); + CHECK_EQ(rfind_not_in_parentheses("this (a, string)", ',', opens, closes), npos); + CHECK_EQ(rfind_not_in_parentheses("(a, string)", ',', opens, closes), npos); + } + + SUBCASE("Single char and similar open/close pair") + { + CHECK_EQ(rfind_not_in_parentheses(R"("some, csv")", ',', '"', '"'), npos); + CHECK_EQ(rfind_not_in_parentheses(R"("some, csv","some, value")", ',', '"', '"'), 11); + CHECK_EQ(rfind_not_in_parentheses(R"("some, csv","value""here")", ',', '"', '"'), 11); + + CHECK_EQ( + find_not_in_parentheses(R"("some, csv)", ',', '"', '"').error(), + ParseError::InvalidInput + ); + + static constexpr auto opens = std::array{ '[', '(', '\'', '"' }; + static constexpr auto closes = std::array{ ']', ')', '\'', '"' }; + CHECK_EQ( + rfind_not_in_parentheses(R"(,[welcome, here], '("hello", world)')", ',', opens, closes), + 16 + ); + CHECK_EQ( + rfind_not_in_parentheses(",[welcome, here], ('[(hello)], ([world])')", ',', opens, closes), + 16 + ); + CHECK_EQ( + rfind_not_in_parentheses(",('hello', ']world,) welcome, here]", ',', opens, closes) + .error(), + ParseError::InvalidInput + ); + } + + SUBCASE("Substring and different open/close pair") + { + CHECK_EQ(rfind_not_in_parentheses("", "::"), npos); + CHECK_EQ(rfind_not_in_parentheses("Nothing to see here", "::"), npos); + CHECK_EQ(rfind_not_in_parentheses("(hello::world)", "::"), npos); + + CHECK_EQ(rfind_not_in_parentheses("hello::world", "::"), 5); + CHECK_EQ(rfind_not_in_parentheses("hello::", "::"), 5); + CHECK_EQ(rfind_not_in_parentheses("hello::world::welcome", "::"), 12); + CHECK_EQ(rfind_not_in_parentheses("::(hello::world)::(welcome::here)", "::"), 16); + CHECK_EQ(rfind_not_in_parentheses("(hello::world)::(welcome::here)", "::", '[', ']'), 24); + CHECK_EQ(rfind_not_in_parentheses(",(welcome::here)[hello::world]", "::", '[', ']'), 9); + + CHECK_EQ(rfind_not_in_parentheses("hello::world::)", "::").error(), ParseError::InvalidInput); + CHECK_EQ(rfind_not_in_parentheses("hello)", "::").error(), ParseError::InvalidInput); + CHECK_EQ(rfind_not_in_parentheses("(hello", "::").error(), ParseError::InvalidInput); + + static constexpr auto opens = std::array{ '[', '(' }; + static constexpr auto closes = std::array{ ']', ')' }; + + CHECK_EQ( + rfind_not_in_parentheses("(some str)", "", opens, closes).error(), + ParseError::InvalidInput + ); + CHECK_EQ( + rfind_not_in_parentheses(R"(hoy ,(hello , world), [welcome , here],elf)", " ,", opens, closes), + 3 + ); + CHECK_EQ( + rfind_not_in_parentheses("hey ,([(hello)] , ([world])), [it , here]", " ,", opens, closes), + 3 + ); + CHECK_EQ( + rfind_not_in_parentheses("(hello , ]world,) welcome, here],", ", ", opens, closes) + .error(), + ParseError::InvalidInput + ); + } + + SUBCASE("Substring and similar open/close pair") + { + CHECK_EQ(rfind_not_in_parentheses(R"("some::csv")", "::", '"', '"'), npos); + CHECK_EQ(rfind_not_in_parentheses(R"("some::csv"::"some::value")", "::", '"', '"'), 11); + CHECK_EQ(rfind_not_in_parentheses(R"("some::csv"::"value""here")", "::", '"', '"'), 11); + CHECK_EQ( + rfind_not_in_parentheses(R"(some::csv")", "::", '"', '"').error(), + ParseError::InvalidInput + ); + + static constexpr auto opens = std::array{ '[', '(', '\'', '"' }; + static constexpr auto closes = std::array{ ']', ')', '\'', '"' }; + + CHECK_EQ( + rfind_not_in_parentheses("(some str)", "", opens, closes).error(), + ParseError::InvalidInput + ); + CHECK_EQ( + find_not_in_parentheses( + "hoy , ('[(hello)] , ([world])'), [welcome , here]", + " ,", + opens, + closes + ), + 3 + ); + CHECK_EQ( + rfind_not_in_parentheses("('hello' , ']world,) welcome, here],", ", ", opens, closes) + .error(), + ParseError::InvalidInput + ); + } + } + TEST_CASE("glob_match") { CHECK(glob_match("python", "python")); From 7d08bc179c81964fa90d21650a5781a336e38b9f Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 27 Feb 2024 15:37:38 +0100 Subject: [PATCH 07/22] Use rfind_not_in_parentheses in MatchSpec --- libmamba/include/mamba/specs/match_spec.hpp | 1 + libmamba/src/specs/match_spec.cpp | 69 +++++++++++++-------- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/libmamba/include/mamba/specs/match_spec.hpp b/libmamba/include/mamba/specs/match_spec.hpp index 17d23c617e..39b766c8b3 100644 --- a/libmamba/include/mamba/specs/match_spec.hpp +++ b/libmamba/include/mamba/specs/match_spec.hpp @@ -33,6 +33,7 @@ namespace mamba::specs inline static constexpr char alt_list_close = ')'; inline static constexpr char prefered_quote = '"'; inline static constexpr char alt_quote = '\''; + inline static constexpr char channel_namespace_spec_sep = ':'; [[nodiscard]] static auto parse(std::string_view spec) -> MatchSpec; diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index 96744b2e90..ffe0995c1e 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -14,6 +14,7 @@ #include "mamba/specs/archive.hpp" #include "mamba/specs/match_spec.hpp" +#include "mamba/util/parsers.hpp" #include "mamba/util/string.hpp" #include "mamba/util/url_manip.hpp" @@ -127,6 +128,26 @@ namespace mamba::specs }; return std::all_of(text.cbegin(), text.cend(), is_hash_char); } + + auto rfind_chan_ns_split(std::string_view str) + { + return util::rfind_not_in_parentheses( + str, + MatchSpec::channel_namespace_spec_sep, + open_or_quote, + close_or_quote + ) + // FIXME temporary while MatchSpec::parse does not return ``exepted``. + .or_else( + [&](const auto&) { + throw std::invalid_argument( + fmt::format(R"(Invalid parenthesis in MatchSpec "{}")", str) + ); + } + ) + .value(); + ; + } } auto MatchSpec::parse(std::string_view spec) -> MatchSpec @@ -164,6 +185,29 @@ namespace mamba::specs auto spec_str = std::string(spec); auto out = MatchSpec(); + const auto spec_pos = rfind_chan_ns_split(spec); + if (spec_pos != std::string_view::npos) + { + spec_str = spec.substr(spec_pos + 1); + const auto ns_pos = rfind_chan_ns_split(spec.substr(0, spec_pos)); + if (ns_pos != std::string_view::npos) + { + out.m_name_space = spec.substr(ns_pos + 1, spec_pos); + out.m_channel = UnresolvedChannel::parse(spec.substr(0, ns_pos)) + .or_else([](specs::ParseError&& error) + { throw std::move(error); }) + .value(); + } + else + { + out.m_name_space = spec.substr(0, spec_pos); + } + } + else + { + spec_str = spec; + } + auto extract_kv = [&spec_str](const std::string& kv_string, auto& map) { static const std::regex kv_re("([a-zA-Z0-9_-]+?)=([\"\']?)([^\'\"]*?)(\\2)(?:[\'\", ]|$)"); @@ -218,31 +262,6 @@ namespace mamba::specs ); } - auto m5 = util::rsplit(spec_str, ":", 2); - auto m5_len = m5.size(); - std::string channel_str; - if (m5_len == 3) - { - out.m_channel = UnresolvedChannel::parse(m5[0]) - .or_else([](specs::ParseError&& error) { throw std::move(error); }) - .value(); - out.m_name_space = m5[1]; - spec_str = m5[2]; - } - else if (m5_len == 2) - { - out.m_name_space = m5[0]; - spec_str = m5[1]; - } - else if (m5_len == 1) - { - spec_str = m5[0]; - } - else - { - throw std::runtime_error("Parsing of channel / namespace / subdir failed."); - } - // support faulty conda matchspecs such as `libblas=[build=*mkl]`, which is // the repr of `libblas=*=*mkl` if (spec_str.back() == '=') From 6c8652cf6ac46aa5aa6d4dec6f623bf6e4dfb890 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 27 Feb 2024 17:18:54 +0100 Subject: [PATCH 08/22] Refactor chan ns spec split --- libmamba/src/specs/match_spec.cpp | 70 +++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index ffe0995c1e..b62ed73da4 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -129,7 +130,7 @@ namespace mamba::specs return std::all_of(text.cbegin(), text.cend(), is_hash_char); } - auto rfind_chan_ns_split(std::string_view str) + auto rfind_channel_namespace_split(std::string_view str) { return util::rfind_not_in_parentheses( str, @@ -148,6 +149,38 @@ namespace mamba::specs .value(); ; } + + auto split_channel_namespace_spec(std::string_view str) + -> std::tuple + { + const auto spec_pos = rfind_channel_namespace_split(str); + if (spec_pos != std::string_view::npos) + { + const auto spec = str.substr(spec_pos + 1); + const auto ns_pos = rfind_channel_namespace_split(str.substr(0, spec_pos)); + if (ns_pos != std::string_view::npos) + { + return { + /* channel= */ str.substr(0, ns_pos), + /* namespace= */ str.substr(ns_pos + 1, spec_pos), + /* spec= */ str.substr(spec_pos + 1), + }; + } + else + { + return { + /* channel= */ "", + /* namespace= */ spec.substr(0, spec_pos), + /* spec= */ str.substr(spec_pos + 1), + }; + } + } + return { + /* channel= */ "", + /* namespace= */ "", + /* spec= */ str, + }; + } } auto MatchSpec::parse(std::string_view spec) -> MatchSpec @@ -182,32 +215,25 @@ namespace mamba::specs } } - auto spec_str = std::string(spec); auto out = MatchSpec(); - const auto spec_pos = rfind_chan_ns_split(spec); - if (spec_pos != std::string_view::npos) + // Split full matchspec like + // ``https://channel[plat]:namespace:spec >=3 [attr="val", ...]`` + // into: + // - ``https://channel[plat]`` + // - ``namespace`` + // - ``spec >=3 [attr="val", ...]`` + auto chan = std::string_view(); + std::tie(chan, out.m_name_space, spec) = split_channel_namespace_spec(spec); + if (!chan.empty()) { - spec_str = spec.substr(spec_pos + 1); - const auto ns_pos = rfind_chan_ns_split(spec.substr(0, spec_pos)); - if (ns_pos != std::string_view::npos) - { - out.m_name_space = spec.substr(ns_pos + 1, spec_pos); - out.m_channel = UnresolvedChannel::parse(spec.substr(0, ns_pos)) - .or_else([](specs::ParseError&& error) - { throw std::move(error); }) - .value(); - } - else - { - out.m_name_space = spec.substr(0, spec_pos); - } - } - else - { - spec_str = spec; + out.m_channel = UnresolvedChannel::parse(chan) + .or_else([](specs::ParseError&& error) { throw std::move(error); }) + .value(); } + auto spec_str = std::string(spec); + auto extract_kv = [&spec_str](const std::string& kv_string, auto& map) { static const std::regex kv_re("([a-zA-Z0-9_-]+?)=([\"\']?)([^\'\"]*?)(\\2)(?:[\'\", ]|$)"); From f12e740e7ffd887088620b7650b402add7db403c Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 27 Feb 2024 18:31:45 +0100 Subject: [PATCH 09/22] Allocation guard --- libmamba/include/mamba/specs/match_spec.hpp | 9 +- libmamba/src/specs/match_spec.cpp | 123 ++++++++++++++------ 2 files changed, 94 insertions(+), 38 deletions(-) diff --git a/libmamba/include/mamba/specs/match_spec.hpp b/libmamba/include/mamba/specs/match_spec.hpp index 39b766c8b3..858a6bed59 100644 --- a/libmamba/include/mamba/specs/match_spec.hpp +++ b/libmamba/include/mamba/specs/match_spec.hpp @@ -34,6 +34,8 @@ namespace mamba::specs inline static constexpr char prefered_quote = '"'; inline static constexpr char alt_quote = '\''; inline static constexpr char channel_namespace_spec_sep = ':'; + inline static constexpr char attribute_sep = ','; + inline static constexpr char attribute_assign = '='; [[nodiscard]] static auto parse(std::string_view spec) -> MatchSpec; @@ -76,11 +78,12 @@ namespace mamba::specs [[nodiscard]] auto track_features() const -> std::string_view; void set_track_features(std::string val); + [[nodiscard]] auto filename() const -> std::string_view; + void set_filename(std::string val); + [[nodiscard]] auto optional() const -> bool; void set_optional(bool opt); - [[nodiscard]] auto filename() const -> const std::string&; - [[nodiscard]] auto url() const -> const std::string&; [[nodiscard]] auto conda_build_form() const -> std::string; @@ -100,6 +103,7 @@ namespace mamba::specs std::string license_family = {}; std::string features = {}; std::string track_features = {}; + std::string filename = {}; bool optional = false; }; @@ -111,7 +115,6 @@ namespace mamba::specs BuildNumberSpec m_build_number; util::heap_optional m_extra = {}; // unlikely data // TODO can put inside channel - std::string m_filename; std::string m_url; auto extra() -> ExtraMembers&; diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index b62ed73da4..587f3c8ca1 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -181,6 +181,47 @@ namespace mamba::specs /* spec= */ str, }; } + + auto find_attribute_split(std::string_view str) + { + return util::find_not_in_parentheses(str, MatchSpec::attribute_sep, open_or_quote, close_or_quote) + // FIXME temporary while MatchSpec::parse does not return ``exepted``. + .or_else( + [&](const auto&) { + throw std::invalid_argument( + fmt::format(R"(Invalid parenthesis in MatchSpec "{}")", str) + ); + } + ) + .value(); + ; + } + + auto strip_whitespace_quotes(std::string_view str) -> std::string_view + { + return util::strip_if( + str, + [](char c) -> bool { + return !util::is_graphic(c) || (c == MatchSpec::prefered_quote) + || (c == MatchSpec::alt_quote); + } + + ); + } + + template + void extract_kv(std::string_view str, Map& map) + { + const auto next_pos = find_attribute_split(str); + + auto [key, value] = util::split_once(str.substr(0, next_pos), MatchSpec::attribute_assign); + map.emplace(util::strip(key), strip_whitespace_quotes(value.value_or(""))); + + if (next_pos != std::string_view::npos) + { + extract_kv(str.substr(next_pos + 1), map); + } + }; } auto MatchSpec::parse(std::string_view spec) -> MatchSpec @@ -234,27 +275,6 @@ namespace mamba::specs auto spec_str = std::string(spec); - auto extract_kv = [&spec_str](const std::string& kv_string, auto& map) - { - static const std::regex kv_re("([a-zA-Z0-9_-]+?)=([\"\']?)([^\'\"]*?)(\\2)(?:[\'\", ]|$)"); - std::cmatch kv_match; - const char* text_iter = kv_string.c_str(); - - while (std::regex_search(text_iter, kv_match, kv_re)) - { - auto key = kv_match[1].str(); - auto value = kv_match[3].str(); - if (key.size() == 0 || value.size() == 0) - { - throw std::runtime_error( - util::concat(R"(key-value mismatch in brackets ")", spec_str, '"') - ); - } - text_iter += kv_match.position() + kv_match.length(); - map[key] = value; - } - }; - std::smatch match; std::unordered_map extra; @@ -475,6 +495,11 @@ namespace mamba::specs return m_build_string; } + void MatchSpec::set_build_string(BuildStringSpec bs) + { + m_build_string = std::move(bs); + } + auto MatchSpec::md5() const -> std::string_view { if (m_extra.has_value()) @@ -486,7 +511,10 @@ namespace mamba::specs void MatchSpec::set_md5(std::string val) { - extra().md5 = std::move(val); + if (val != md5()) // Avoid allocating extra to set the default value + { + extra().md5 = std::move(val); + } } auto MatchSpec::sha256() const -> std::string_view @@ -500,7 +528,10 @@ namespace mamba::specs void MatchSpec::set_sha256(std::string val) { - extra().sha256 = std::move(val); + if (val != sha256()) // Avoid allocating extra to set the default value + { + extra().sha256 = std::move(val); + } } auto MatchSpec::license() const -> std::string_view @@ -514,7 +545,10 @@ namespace mamba::specs void MatchSpec::set_license(std::string val) { - extra().license = std::move(val); + if (val != license()) // Avoid allocating extra to set the default value + { + extra().license = std::move(val); + } } auto MatchSpec::license_family() const -> std::string_view @@ -528,7 +562,10 @@ namespace mamba::specs void MatchSpec::set_license_family(std::string val) { - extra().license_family = std::move(val); + if (val != license_family()) // Avoid allocating extra to set the default value + { + extra().license_family = std::move(val); + } } auto MatchSpec::features() const -> std::string_view @@ -542,7 +579,10 @@ namespace mamba::specs void MatchSpec::set_features(std::string val) { - extra().features = std::move(val); + if (val != features()) // Avoid allocating extra to set the default value + { + extra().features = std::move(val); + } } auto MatchSpec::track_features() const -> std::string_view @@ -556,27 +596,40 @@ namespace mamba::specs void MatchSpec::set_track_features(std::string val) { - extra().track_features = std::move(val); + if (val != track_features()) // Avoid allocating extra to set the default value + { + extra().track_features = std::move(val); + } } - auto MatchSpec::optional() const -> bool + auto MatchSpec::filename() const -> std::string_view { - return m_extra.has_value() && m_extra->optional; + if (m_extra.has_value()) + { + return m_extra->filename; + } + return ""; } - void MatchSpec::set_optional(bool opt) + void MatchSpec::set_filename(std::string val) { - extra().optional = opt; + if (val != filename()) // Avoid allocating extra to set the default value + { + extra().filename = std::move(val); + } } - void MatchSpec::set_build_string(BuildStringSpec bs) + auto MatchSpec::optional() const -> bool { - m_build_string = std::move(bs); + return m_extra.has_value() && m_extra->optional; } - auto MatchSpec::filename() const -> const std::string& + void MatchSpec::set_optional(bool opt) { - return m_filename; + if (opt != optional()) // Avoid allocating extra to set the default value + { + extra().optional = opt; + } } auto MatchSpec::url() const -> const std::string& From d121b0f7be7d6536a77948b7533d4b6e8969a57c Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Tue, 27 Feb 2024 19:25:27 +0100 Subject: [PATCH 10/22] Refactor MatchSpec filename handling --- libmamba/include/mamba/specs/match_spec.hpp | 10 +- libmamba/src/specs/match_spec.cpp | 112 +++++++++++++++----- 2 files changed, 96 insertions(+), 26 deletions(-) diff --git a/libmamba/include/mamba/specs/match_spec.hpp b/libmamba/include/mamba/specs/match_spec.hpp index 858a6bed59..faa83261f6 100644 --- a/libmamba/include/mamba/specs/match_spec.hpp +++ b/libmamba/include/mamba/specs/match_spec.hpp @@ -81,9 +81,12 @@ namespace mamba::specs [[nodiscard]] auto filename() const -> std::string_view; void set_filename(std::string val); + [[nodiscard]] auto is_file() const -> bool; + [[nodiscard]] auto optional() const -> bool; void set_optional(bool opt); + // TODO as string_view conditional on channel type [[nodiscard]] auto url() const -> const std::string&; [[nodiscard]] auto conda_build_form() const -> std::string; @@ -91,8 +94,6 @@ namespace mamba::specs [[nodiscard]] auto is_simple() const -> bool; - [[nodiscard]] auto is_file() const -> bool; - private: struct ExtraMembers @@ -118,6 +119,11 @@ namespace mamba::specs std::string m_url; auto extra() -> ExtraMembers&; + [[nodiscard]] auto channel_is_file() const -> bool; + [[nodiscard]] auto channel_filename() const -> std::string_view; + void set_channel_filename(std::string val); + [[nodiscard]] auto extra_filename() const -> std::string_view; + void set_extra_filename(std::string val); }; namespace match_spec_literals diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index 587f3c8ca1..37119838b3 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -31,11 +31,11 @@ namespace mamba::specs }; auto out = MatchSpec(); + // Channel is also read for the filename so no need to set it. out.m_channel = UnresolvedChannel::parse(spec) .or_else([](specs::ParseError&& error) { throw std::move(error); }) .value(); auto [_, pkg] = util::rsplit_once(out.m_channel->location(), '/'); - out.m_filename = std::string(pkg); out.m_url = util::path_or_url_to_url(spec); // Build string @@ -410,7 +410,7 @@ namespace mamba::specs } if (const auto& val = at_or(extra, "fn", ""); !val.empty()) { - out.m_filename = val; + out.set_filename(val); } if (const auto& val = at_or(extra, "md5", ""); !val.empty()) { @@ -440,6 +440,44 @@ namespace mamba::specs return out; } + auto MatchSpec::channel_is_file() const -> bool + { + if (const auto& chan = channel(); chan.has_value()) + { + auto type = chan->type(); + using Type = typename UnresolvedChannel::Type; + return (type == Type::PackageURL) || (type == Type::PackagePath); + } + return false; + } + + auto MatchSpec::channel_filename() const -> std::string_view + { + if (channel_is_file()) + { + assert(channel().has_value()); + auto [_, pkg] = util::rsplit_once(channel()->location(), '/'); + return pkg; + } + return {}; + } + + void MatchSpec::set_channel_filename(std::string val) + { + assert(channel().has_value()); + assert(channel_is_file()); + auto location = m_channel->clear_location(); + auto [base, pkg] = util::rsplit_once(location, '/'); + assert(base.has_value()); + location = base.value_or(""); + location += val; + set_channel({ UnresolvedChannel( + std::move(location), + m_channel->clear_platform_filters(), + m_channel->type() + ) }); + } + auto MatchSpec::channel() const -> const std::optional& { return m_channel; @@ -448,6 +486,54 @@ namespace mamba::specs void MatchSpec::set_channel(std::optional chan) { m_channel = std::move(chan); + // Channel filename take precedence + if (channel_is_file() && !extra_filename().empty()) + { + set_extra_filename({}); + } + } + + auto MatchSpec::extra_filename() const -> std::string_view + { + if (m_extra.has_value()) + { + return m_extra->filename; + } + return {}; + } + + void MatchSpec::set_extra_filename(std::string val) + { + if (val != filename()) // Avoid allocating extra to set the default value + { + extra().filename = std::move(val); + } + } + + auto MatchSpec::filename() const -> std::string_view + { + if (channel_is_file()) + { + return channel_filename(); + } + return extra_filename(); + } + + void MatchSpec::set_filename(std::string val) + { + if (channel_is_file()) + { + set_channel_filename(std::move(val)); + } + else + { + set_extra_filename(std::move(val)); + } + } + + auto MatchSpec::is_file() const -> bool + { + return (!filename().empty()) || (!m_url.empty()); } auto MatchSpec::name_space() const -> const std::string& @@ -602,23 +688,6 @@ namespace mamba::specs } } - auto MatchSpec::filename() const -> std::string_view - { - if (m_extra.has_value()) - { - return m_extra->filename; - } - return ""; - } - - void MatchSpec::set_filename(std::string val) - { - if (val != filename()) // Avoid allocating extra to set the default value - { - extra().filename = std::move(val); - } - } - auto MatchSpec::optional() const -> bool { return m_extra.has_value() && m_extra->optional; @@ -792,11 +861,6 @@ namespace mamba::specs && m_build_number.is_explicitly_free(); } - auto MatchSpec::is_file() const -> bool - { - return (!m_filename.empty()) || (!m_url.empty()); - } - auto MatchSpec::extra() -> ExtraMembers& { if (!m_extra.has_value()) From 2e43386e5e369d0390a2bbfd997067e82683906a Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 09:15:16 +0100 Subject: [PATCH 11/22] Remove MatchSpec::url --- libmamba/include/mamba/specs/match_spec.hpp | 5 --- libmamba/src/specs/match_spec.cpp | 26 +++++-------- libmamba/tests/src/specs/test_match_spec.cpp | 41 ++++++++------------ 3 files changed, 25 insertions(+), 47 deletions(-) diff --git a/libmamba/include/mamba/specs/match_spec.hpp b/libmamba/include/mamba/specs/match_spec.hpp index faa83261f6..c78fecc3fb 100644 --- a/libmamba/include/mamba/specs/match_spec.hpp +++ b/libmamba/include/mamba/specs/match_spec.hpp @@ -86,9 +86,6 @@ namespace mamba::specs [[nodiscard]] auto optional() const -> bool; void set_optional(bool opt); - // TODO as string_view conditional on channel type - [[nodiscard]] auto url() const -> const std::string&; - [[nodiscard]] auto conda_build_form() const -> std::string; [[nodiscard]] auto str() const -> std::string; @@ -115,8 +112,6 @@ namespace mamba::specs std::string m_name_space; BuildNumberSpec m_build_number; util::heap_optional m_extra = {}; // unlikely data - // TODO can put inside channel - std::string m_url; auto extra() -> ExtraMembers&; [[nodiscard]] auto channel_is_file() const -> bool; diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index 37119838b3..544f2f5f99 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -36,7 +36,6 @@ namespace mamba::specs .or_else([](specs::ParseError&& error) { throw std::move(error); }) .value(); auto [_, pkg] = util::rsplit_once(out.m_channel->location(), '/'); - out.m_url = util::path_or_url_to_url(spec); // Build string auto [head, tail] = util::rsplit_once(strip_archive_extension(pkg), '-'); @@ -388,6 +387,12 @@ namespace mamba::specs .or_else([](ParseError&& error) { throw std::move(error); }) .value()); } + if (const auto& val = at_or(extra, "url", ""); !val.empty()) + { + out.set_channel(UnresolvedChannel::parse(val) + .or_else([](ParseError&& error) { throw std::move(error); }) + .value()); + } if (const auto& val = at_or(extra, "subdir", ""); !val.empty()) { if (!out.m_channel.has_value()) @@ -404,10 +409,6 @@ namespace mamba::specs ); } } - if (const auto& val = at_or(extra, "url", ""); !val.empty()) - { - out.m_url = val; - } if (const auto& val = at_or(extra, "fn", ""); !val.empty()) { out.set_filename(val); @@ -533,7 +534,7 @@ namespace mamba::specs auto MatchSpec::is_file() const -> bool { - return (!filename().empty()) || (!m_url.empty()); + return !filename().empty(); } auto MatchSpec::name_space() const -> const std::string& @@ -701,11 +702,6 @@ namespace mamba::specs } } - auto MatchSpec::url() const -> const std::string& - { - return m_url; - } - auto MatchSpec::conda_build_form() const -> std::string { const bool has_version = !m_version.is_explicitly_free(); @@ -747,6 +743,7 @@ namespace mamba::specs // else: // brackets.append("subdir=%s" % subdir_matcher) + // TODO change as attribute if complex URL, and has "url" if PackageUrl if (m_channel.has_value()) { res << fmt::format("{}::", *m_channel); @@ -816,12 +813,7 @@ namespace mamba::specs const auto& q = maybe_quote(feats); formatted_brackets.push_back(util::concat("features=", q, feats, q)); } - if (const auto& u = url(); !u.empty()) - { - const auto& q = maybe_quote(u); - formatted_brackets.push_back(util::concat("url=", q, u, q)); - } - else if (const auto& fn = filename(); !fn.empty()) + else if (const auto& fn = filename(); !fn.empty() && !channel_is_file()) { // No "fn" when we have a URL const auto& q = maybe_quote(fn); diff --git a/libmamba/tests/src/specs/test_match_spec.cpp b/libmamba/tests/src/specs/test_match_spec.cpp index e117f72ba8..64906bb796 100644 --- a/libmamba/tests/src/specs/test_match_spec.cpp +++ b/libmamba/tests/src/specs/test_match_spec.cpp @@ -90,7 +90,7 @@ TEST_SUITE("specs::match_spec") CHECK_EQ(ms.version().str(), "==6.4"); CHECK_EQ(ms.build_string().str(), "h59595ed_2"); CHECK_EQ( - ms.url(), + ms.channel().value().str(), "https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-h59595ed_2.conda" ); CHECK_EQ(ms.filename(), "ncurses-6.4-h59595ed_2.conda"); @@ -104,7 +104,7 @@ TEST_SUITE("specs::match_spec") CHECK_EQ(ms.version().str(), "==0.1"); CHECK_EQ(ms.build_string().str(), "conda_forge"); CHECK_EQ( - ms.url(), + ms.channel().value().str(), "https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2" ); CHECK_EQ(ms.filename(), "_libgcc_mutex-0.1-conda_forge.tar.bz2"); @@ -117,7 +117,7 @@ TEST_SUITE("specs::match_spec") CHECK_EQ(ms.version().str(), "==11.2.0"); CHECK_EQ(ms.build_string().str(), "h1d223b6_13"); CHECK_EQ( - ms.url(), + ms.channel().value().str(), "https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-11.2.0-h1d223b6_13.tar.bz2" ); CHECK_EQ(ms.filename(), "libgcc-ng-11.2.0-h1d223b6_13.tar.bz2"); @@ -129,26 +129,10 @@ TEST_SUITE("specs::match_spec") CHECK_EQ(ms.name().str(), "_libgcc_mutex"); CHECK_EQ(ms.version().str(), "==0.1"); CHECK_EQ(ms.build_string().str(), "conda_forge"); - if (util::on_win) - { - std::string driveletter = fs::absolute(fs::u8path("/")).string().substr(0, 1); - CHECK_EQ( - ms.url(), - util::concat( - "file://", - driveletter, - ":/home/randomguy/Downloads/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2" - ) - ); - } - else - { - CHECK_EQ( - ms.url(), - "file:///home/randomguy/Downloads/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2" - ); - } - + CHECK_EQ( + ms.channel().value().str(), + "/home/randomguy/Downloads/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2" + ); CHECK_EQ(ms.filename(), "_libgcc_mutex-0.1-conda_forge.tar.bz2"); } { @@ -156,7 +140,10 @@ TEST_SUITE("specs::match_spec") "xtensor[url=file:///home/wolfv/Downloads/xtensor-0.21.4-hc9558a2_0.tar.bz2]" ); CHECK_EQ(ms.name().str(), "xtensor"); - CHECK_EQ(ms.url(), "file:///home/wolfv/Downloads/xtensor-0.21.4-hc9558a2_0.tar.bz2"); + CHECK_EQ( + ms.channel().value().str(), + "file:///home/wolfv/Downloads/xtensor-0.21.4-hc9558a2_0.tar.bz2" + ); } { auto ms = MatchSpec::parse("foo=1.0=2"); @@ -173,8 +160,12 @@ TEST_SUITE("specs::match_spec") auto ms = MatchSpec::parse( "foo=1.0=2[md5=123123123, license=BSD-3, fn='test 123.tar.bz2', url='abcdef']" ); + CHECK_EQ(ms.channel().value().str(), "abcdef"); CHECK_EQ(ms.conda_build_form(), "foo 1.0.* 2"); - CHECK_EQ(ms.str(), R"ms(foo=1.0=2[url=abcdef,md5=123123123,license=BSD-3])ms"); + CHECK_EQ( + ms.str(), + R"ms(abcdef::foo=1.0=2[fn="test 123.tar.bz2",md5=123123123,license=BSD-3])ms" + ); } { auto ms = MatchSpec::parse("libblas=*=*mkl"); From 8c9937915f6bad97d3a581c99a04d65e3b42b9d1 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 11:58:26 +0100 Subject: [PATCH 12/22] Refactor MatchSpec::subdirs --- libmamba/include/mamba/specs/match_spec.hpp | 26 ++++++-- .../mamba/specs/unresolved_channel.hpp | 2 + libmamba/src/specs/match_spec.cpp | 62 +++++++++++++++---- libmamba/src/specs/unresolved_channel.cpp | 40 ++++++------ 4 files changed, 92 insertions(+), 38 deletions(-) diff --git a/libmamba/include/mamba/specs/match_spec.hpp b/libmamba/include/mamba/specs/match_spec.hpp index c78fecc3fb..3da295a86d 100644 --- a/libmamba/include/mamba/specs/match_spec.hpp +++ b/libmamba/include/mamba/specs/match_spec.hpp @@ -7,6 +7,7 @@ #ifndef MAMBA_SPECS_MATCH_SPEC #define MAMBA_SPECS_MATCH_SPEC +#include #include #include #include @@ -25,6 +26,8 @@ namespace mamba::specs using NameSpec = GlobSpec; using BuildStringSpec = GlobSpec; + using subdir_list = typename UnresolvedChannel::dynamic_platform_set; + using subdir_list_const_ref = std::reference_wrapper; inline static constexpr char url_md5_sep = '#'; inline static constexpr char prefered_list_open = '['; @@ -45,6 +48,14 @@ namespace mamba::specs [[nodiscard]] auto channel() const -> const std::optional&; void set_channel(std::optional chan); + [[nodiscard]] auto filename() const -> std::string_view; + void set_filename(std::string val); + + [[nodiscard]] auto is_file() const -> bool; + + [[nodiscard]] auto subdirs() const -> std::optional; + void set_subdirs(subdir_list val); + [[nodiscard]] auto name_space() const -> const std::string&; void set_name_space(std::string ns); @@ -78,11 +89,6 @@ namespace mamba::specs [[nodiscard]] auto track_features() const -> std::string_view; void set_track_features(std::string val); - [[nodiscard]] auto filename() const -> std::string_view; - void set_filename(std::string val); - - [[nodiscard]] auto is_file() const -> bool; - [[nodiscard]] auto optional() const -> bool; void set_optional(bool opt); @@ -95,13 +101,16 @@ namespace mamba::specs struct ExtraMembers { + // The filename is stored as part of the channel when it is a full Package URL + std::string filename = {}; + // The filename is stored as part of the channel when it is available + subdir_list subdirs = {}; std::string md5 = {}; std::string sha256 = {}; std::string license = {}; std::string license_family = {}; std::string features = {}; std::string track_features = {}; - std::string filename = {}; bool optional = false; }; @@ -114,11 +123,16 @@ namespace mamba::specs util::heap_optional m_extra = {}; // unlikely data auto extra() -> ExtraMembers&; + [[nodiscard]] auto channel_is_file() const -> bool; [[nodiscard]] auto channel_filename() const -> std::string_view; void set_channel_filename(std::string val); + [[nodiscard]] auto extra_filename() const -> std::string_view; void set_extra_filename(std::string val); + + [[nodiscard]] auto extra_subdirs() const -> std::optional; + void set_extra_subdirs(subdir_list val); }; namespace match_spec_literals diff --git a/libmamba/include/mamba/specs/unresolved_channel.hpp b/libmamba/include/mamba/specs/unresolved_channel.hpp index 9f610b3253..0239fbf7ba 100644 --- a/libmamba/include/mamba/specs/unresolved_channel.hpp +++ b/libmamba/include/mamba/specs/unresolved_channel.hpp @@ -89,6 +89,8 @@ namespace mamba::specs using dynamic_platform_set = util::flat_set; + [[nodiscard]] static auto parse_platform_list(std::string_view plats) -> dynamic_platform_set; + [[nodiscard]] static auto parse(std::string_view str) -> expected_parse_t; UnresolvedChannel() = default; diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index 544f2f5f99..d4a35794b0 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -17,7 +17,6 @@ #include "mamba/specs/match_spec.hpp" #include "mamba/util/parsers.hpp" #include "mamba/util/string.hpp" -#include "mamba/util/url_manip.hpp" namespace mamba::specs { @@ -395,18 +394,10 @@ namespace mamba::specs } if (const auto& val = at_or(extra, "subdir", ""); !val.empty()) { - if (!out.m_channel.has_value()) + // Channel part of the matchspec have priority + if (auto chan = out.channel(); !chan.has_value() || chan->platform_filters().empty()) { - out.m_channel = UnresolvedChannel("", { val }, UnresolvedChannel::Type::Unknown); - } - // Subdirs specified in the channel part have higher precedence - else if (out.m_channel->platform_filters().empty()) - { - out.m_channel = UnresolvedChannel( - out.m_channel->clear_location(), - { val }, - out.m_channel->type() - ); + out.set_subdirs({ UnresolvedChannel::parse_platform_list(val) }); } } if (const auto& val = at_or(extra, "fn", ""); !val.empty()) @@ -505,7 +496,7 @@ namespace mamba::specs void MatchSpec::set_extra_filename(std::string val) { - if (val != filename()) // Avoid allocating extra to set the default value + if (val != extra_filename()) // Avoid allocating extra to set the default value { extra().filename = std::move(val); } @@ -525,6 +516,7 @@ namespace mamba::specs if (channel_is_file()) { set_channel_filename(std::move(val)); + set_extra_filename(""); } else { @@ -537,6 +529,50 @@ namespace mamba::specs return !filename().empty(); } + auto MatchSpec::extra_subdirs() const -> std::optional + { + if (m_extra.has_value() && !m_extra->subdirs.empty()) + { + return { std::cref(m_extra->subdirs) }; + } + return {}; + } + + void MatchSpec::set_extra_subdirs(subdir_list val) + { + // Avoid allocating extra to set the default value + if (m_extra.has_value() || !val.empty()) + { + extra().subdirs = std::move(val); + } + } + + auto MatchSpec::subdirs() const -> std::optional + { + if (m_channel.has_value() && !m_channel->platform_filters().empty()) + { + return { std::cref(m_channel->platform_filters()) }; + } + return extra_subdirs(); + } + + void MatchSpec::set_subdirs(subdir_list val) + { + if (m_channel.has_value()) + { + m_channel = UnresolvedChannel( + m_channel->clear_location(), + std::move(val), + m_channel->type() + ); + set_extra_subdirs({}); + } + else + { + extra().subdirs = std::move(val); + } + } + auto MatchSpec::name_space() const -> const std::string& { return m_name_space; diff --git a/libmamba/src/specs/unresolved_channel.cpp b/libmamba/src/specs/unresolved_channel.cpp index 5f8aed253b..d9201a93e5 100644 --- a/libmamba/src/specs/unresolved_channel.cpp +++ b/libmamba/src/specs/unresolved_channel.cpp @@ -28,28 +28,28 @@ namespace mamba::specs [[nodiscard]] auto find_slash_and_platform(std::string_view path) -> std::tuple>; - namespace + auto UnresolvedChannel::parse_platform_list(std::string_view plats) -> dynamic_platform_set { - using dynamic_platform_set = UnresolvedChannel::dynamic_platform_set; + static constexpr auto is_not_sep = [](char c) -> bool + { return !util::contains(UnresolvedChannel::platform_separators, c); }; - auto parse_platform_list(std::string_view plats) -> dynamic_platform_set + auto out = dynamic_platform_set{}; + auto head_rest = util::lstrip_if_parts(plats, is_not_sep); + while (!head_rest.front().empty()) { - static constexpr auto is_not_sep = [](char c) -> bool - { return !util::contains(UnresolvedChannel::platform_separators, c); }; - - auto out = dynamic_platform_set{}; - auto head_rest = util::lstrip_if_parts(plats, is_not_sep); - while (!head_rest.front().empty()) - { - // Accepting all strings, so that user can dynamically register new platforms - out.insert(util::to_lower(util::strip(head_rest.front()))); - head_rest = util::lstrip_if_parts( - util::lstrip(head_rest.back(), UnresolvedChannel::platform_separators), - is_not_sep - ); - } - return out; + // Accepting all strings, so that user can dynamically register new platforms + out.insert(util::to_lower(util::strip(head_rest.front()))); + head_rest = util::lstrip_if_parts( + util::lstrip(head_rest.back(), UnresolvedChannel::platform_separators), + is_not_sep + ); } + return out; + } + + namespace + { + using dynamic_platform_set = UnresolvedChannel::dynamic_platform_set; auto parse_platform_path(std::string_view str) -> std::pair { @@ -78,7 +78,9 @@ namespace mamba::specs { return { { std::string(util::rstrip(str.substr(0, start_pos))), - parse_platform_list(str.substr(start_pos + 1, str.size() - start_pos - 2)), + UnresolvedChannel::parse_platform_list( + str.substr(start_pos + 1, str.size() - start_pos - 2) + ), } }; } else From c9e41963bdf8aa4cde7bff8164badfbe553b9a4f Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 14:09:01 +0100 Subject: [PATCH 13/22] Refactor MatchSpec attribute parsing --- libmamba/src/specs/match_spec.cpp | 168 +++++++++++++++--------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index d4a35794b0..4542d23cd4 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -207,17 +207,94 @@ namespace mamba::specs ); } + auto is_true_string(std::string_view str) -> bool + { + return util::starts_with_any(str, std::array{ 'y', 'Y', 't', 'T', '1' }); + } + + // TODO remove subdir condition oif called first + void set_matchspec_attribute(MatchSpec& out, std::string_view attr, std::string_view val) + { + if (attr == "build_number") + { + out.set_build_number(BuildNumberSpec::parse(val) + .or_else([](ParseError&& error) { throw std::move(error); }) + .value() + + ); + } + else if ((attr == "build") || (attr == "build_strig")) + { + out.set_build_string(MatchSpec::BuildStringSpec(std::string(val))); + } + else if (attr == "version") + { + out.set_version(VersionSpec::parse(val) + .or_else([](ParseError&& error) { throw std::move(error); }) + .value()); + } + else if ((attr == "channel") || (attr == "url")) + { + out.set_channel(UnresolvedChannel::parse(val) + .or_else([](ParseError&& error) { throw std::move(error); }) + .value()); + } + else if (attr == "subdir") + { + // Channel part of the matchspec have priority + if (auto chan = out.channel(); !chan.has_value() || chan->platform_filters().empty()) + { + out.set_subdirs({ UnresolvedChannel::parse_platform_list(val) }); + } + } + else if ((attr == "fn") || (attr == "filename")) + { + out.set_filename(std::string(val)); + } + else if (attr == "md5") + { + out.set_md5(std::string(val)); + } + else if (attr == "sha256") + { + out.set_sha256(std::string(val)); + } + else if (attr == "license") + { + out.set_license(std::string(val)); + } + else if (attr == "license_family") + { + out.set_license_family(std::string(val)); + } + else if (attr == "features") + { + out.set_features(std::string(val)); + } + else if (attr == "track_features") + { + out.set_track_features(std::string(val)); + } + else if (attr == "optional") + { + out.set_optional(is_true_string(val)); + } + } + template - void extract_kv(std::string_view str, Map& map) + void extract_attributes(std::string_view str, Map& map) { const auto next_pos = find_attribute_split(str); auto [key, value] = util::split_once(str.substr(0, next_pos), MatchSpec::attribute_assign); - map.emplace(util::strip(key), strip_whitespace_quotes(value.value_or(""))); + map.emplace( + util::to_lower(util::strip(key)), + strip_whitespace_quotes(value.value_or("true")) + ); if (next_pos != std::string_view::npos) { - extract_kv(str.substr(next_pos + 1), map); + extract_attributes(str.substr(next_pos + 1), map); } }; } @@ -282,7 +359,7 @@ namespace mamba::specs { auto brackets_str = match[1].str(); brackets_str = brackets_str.substr(1, brackets_str.size() - 2); - extract_kv(brackets_str, extra); + extract_attributes(brackets_str, extra); spec_str.erase( static_cast(match.position(1)), static_cast(match.length(1)) @@ -295,7 +372,7 @@ namespace mamba::specs { auto parens_str = match[1].str(); parens_str = parens_str.substr(1, parens_str.size() - 2); - extract_kv(parens_str, extra); + extract_attributes(parens_str, extra); if (parens_str.find("optional") != parens_str.npos) { out.extra().optional = true; @@ -348,87 +425,10 @@ namespace mamba::specs out.m_build_string = {}; } - // TODO think about using a hash function here, (and elsewhere), like: - // https://hbfs.wordpress.com/2017/01/10/strings-in-c-switchcase-statements/ - - auto at_or = [](const auto& map, const auto& key, const auto& def) - { - using Val = typename std::decay_t::mapped_type; - if (auto it = map.find(key); it != map.cend()) - { - return Val(it->second); - } - return Val(def); - }; - - if (const auto& val = at_or(extra, "build_number", ""); !val.empty()) - { - out.set_build_number(BuildNumberSpec::parse(val) - .or_else([](ParseError&& error) { throw std::move(error); }) - .value() - - ); - } - if (const auto& val = at_or(extra, "build", ""); !val.empty()) - { - out.set_build_string(MatchSpec::BuildStringSpec(std::string(val))); - } - if (const auto& val = at_or(extra, "version", ""); !val.empty()) - { - out.set_version( - VersionSpec::parse(val).or_else([](ParseError&& error) { throw std::move(error); } - ).value() - ); - } - if (const auto& val = at_or(extra, "channel", ""); !val.empty()) - { - out.set_channel(UnresolvedChannel::parse(val) - .or_else([](ParseError&& error) { throw std::move(error); }) - .value()); - } - if (const auto& val = at_or(extra, "url", ""); !val.empty()) - { - out.set_channel(UnresolvedChannel::parse(val) - .or_else([](ParseError&& error) { throw std::move(error); }) - .value()); - } - if (const auto& val = at_or(extra, "subdir", ""); !val.empty()) - { - // Channel part of the matchspec have priority - if (auto chan = out.channel(); !chan.has_value() || chan->platform_filters().empty()) - { - out.set_subdirs({ UnresolvedChannel::parse_platform_list(val) }); - } - } - if (const auto& val = at_or(extra, "fn", ""); !val.empty()) - { - out.set_filename(val); - } - if (const auto& val = at_or(extra, "md5", ""); !val.empty()) - { - out.set_md5(val); - } - if (const auto& val = at_or(extra, "sha256", ""); !val.empty()) + for (const auto& [key, val] : extra) { - out.set_sha256(val); + set_matchspec_attribute(out, key, val); } - if (const auto& val = at_or(extra, "license", ""); !val.empty()) - { - out.set_license(val); - } - if (const auto& val = at_or(extra, "license_family", ""); !val.empty()) - { - out.set_license_family(val); - } - if (const auto& val = at_or(extra, "features", ""); !val.empty()) - { - out.set_features(val); - } - if (const auto& val = at_or(extra, "track_features", ""); !val.empty()) - { - out.set_track_features(val); - } - return out; } From 1582cf413323fedc92cf3163d40f0e2058a5c2dc Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 14:27:13 +0100 Subject: [PATCH 14/22] Flexible build and version setting --- libmamba/src/specs/match_spec.cpp | 110 +++++++++++++----------------- 1 file changed, 47 insertions(+), 63 deletions(-) diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index 4542d23cd4..d4d6d25413 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include @@ -63,17 +62,12 @@ namespace mamba::specs namespace { auto parse_version_and_build(std::string_view s) - -> std::pair + -> std::pair { const std::size_t pos = s.find_last_of(" ="); if (pos == s.npos || pos == 0) { - return { - VersionSpec::parse(s) - .or_else([](ParseError&& error) { throw std::move(error); }) - .value(), - MatchSpec::BuildStringSpec(), - }; + return { s, {} }; } if (char c = s[pos]; c == '=') @@ -82,22 +76,12 @@ namespace mamba::specs char d = s[pm1]; if (d == '=' || d == '!' || d == '|' || d == ',' || d == '<' || d == '>' || d == '~') { - return { - VersionSpec::parse(s) - .or_else([](ParseError&& error) { throw std::move(error); }) - .value(), - MatchSpec::BuildStringSpec(), - }; + return { s, {} }; } } // c is either ' ' or pm1 is none of the forbidden chars - return { - VersionSpec::parse(s.substr(0, pos)) - .or_else([](ParseError&& error) { throw std::move(error); }) - .value(), - MatchSpec::BuildStringSpec(std::string(s.substr(pos + 1))), - }; + return { s.substr(0, pos), s.substr(pos + 1) }; } } @@ -117,6 +101,7 @@ namespace mamba::specs MatchSpec::alt_quote, }; + /** Return true if the string is a valid hash hex representation. */ auto is_hash(std::string_view text) -> bool { constexpr auto is_hash_char = [](char c) -> bool @@ -212,89 +197,88 @@ namespace mamba::specs return util::starts_with_any(str, std::array{ 'y', 'Y', 't', 'T', '1' }); } - // TODO remove subdir condition oif called first - void set_matchspec_attribute(MatchSpec& out, std::string_view attr, std::string_view val) + void + set_single_matchspec_attribute(MatchSpec& spec, std::string_view attr, std::string_view val) { if (attr == "build_number") { - out.set_build_number(BuildNumberSpec::parse(val) - .or_else([](ParseError&& error) { throw std::move(error); }) - .value() + spec.set_build_number(BuildNumberSpec::parse(val) + .or_else([](ParseError&& error) { throw std::move(error); }) + .value() ); } else if ((attr == "build") || (attr == "build_strig")) { - out.set_build_string(MatchSpec::BuildStringSpec(std::string(val))); + spec.set_build_string(MatchSpec::BuildStringSpec(std::string(val))); } else if (attr == "version") { - out.set_version(VersionSpec::parse(val) - .or_else([](ParseError&& error) { throw std::move(error); }) - .value()); + spec.set_version(VersionSpec::parse(val) + .or_else([](ParseError&& error) { throw std::move(error); }) + .value()); } else if ((attr == "channel") || (attr == "url")) { - out.set_channel(UnresolvedChannel::parse(val) - .or_else([](ParseError&& error) { throw std::move(error); }) - .value()); + spec.set_channel(UnresolvedChannel::parse(val) + .or_else([](ParseError&& error) { throw std::move(error); }) + .value()); } else if (attr == "subdir") { - // Channel part of the matchspec have priority - if (auto chan = out.channel(); !chan.has_value() || chan->platform_filters().empty()) + if (auto chan = spec.channel(); !chan.has_value() || chan->platform_filters().empty()) { - out.set_subdirs({ UnresolvedChannel::parse_platform_list(val) }); + spec.set_subdirs({ UnresolvedChannel::parse_platform_list(val) }); } } else if ((attr == "fn") || (attr == "filename")) { - out.set_filename(std::string(val)); + spec.set_filename(std::string(val)); } else if (attr == "md5") { - out.set_md5(std::string(val)); + spec.set_md5(std::string(val)); } else if (attr == "sha256") { - out.set_sha256(std::string(val)); + spec.set_sha256(std::string(val)); } else if (attr == "license") { - out.set_license(std::string(val)); + spec.set_license(std::string(val)); } else if (attr == "license_family") { - out.set_license_family(std::string(val)); + spec.set_license_family(std::string(val)); } else if (attr == "features") { - out.set_features(std::string(val)); + spec.set_features(std::string(val)); } else if (attr == "track_features") { - out.set_track_features(std::string(val)); + spec.set_track_features(std::string(val)); } else if (attr == "optional") { - out.set_optional(is_true_string(val)); + spec.set_optional(is_true_string(val)); } } - template - void extract_attributes(std::string_view str, Map& map) + void set_matchspec_attributes(MatchSpec& spec, std::string_view attrs) { - const auto next_pos = find_attribute_split(str); + const auto next_pos = find_attribute_split(attrs); - auto [key, value] = util::split_once(str.substr(0, next_pos), MatchSpec::attribute_assign); - map.emplace( + auto [key, value] = util::split_once(attrs.substr(0, next_pos), MatchSpec::attribute_assign); + set_single_matchspec_attribute( + spec, util::to_lower(util::strip(key)), strip_whitespace_quotes(value.value_or("true")) ); if (next_pos != std::string_view::npos) { - extract_attributes(str.substr(next_pos + 1), map); + set_matchspec_attributes(spec, attrs.substr(next_pos + 1)); } }; } @@ -351,7 +335,6 @@ namespace mamba::specs auto spec_str = std::string(spec); std::smatch match; - std::unordered_map extra; // Step 3. strip off brackets portion static std::regex brackets_re(".*(?:(\\[.*\\]))"); @@ -359,7 +342,7 @@ namespace mamba::specs { auto brackets_str = match[1].str(); brackets_str = brackets_str.substr(1, brackets_str.size() - 2); - extract_attributes(brackets_str, extra); + set_matchspec_attributes(out, brackets_str); spec_str.erase( static_cast(match.position(1)), static_cast(match.length(1)) @@ -372,7 +355,7 @@ namespace mamba::specs { auto parens_str = match[1].str(); parens_str = parens_str.substr(1, parens_str.size() - 2); - extract_attributes(parens_str, extra); + set_matchspec_attributes(out, parens_str); if (parens_str.find("optional") != parens_str.npos) { out.extra().optional = true; @@ -387,7 +370,7 @@ namespace mamba::specs // the repr of `libblas=*=*mkl` if (spec_str.back() == '=') { - spec_str.push_back('*'); + spec_str.erase(spec_str.end() - 1); } // This is #6 of the spec parsing // Look for version *and* build string and separator @@ -417,18 +400,19 @@ namespace mamba::specs )); } - std::tie(out.m_version, out.m_build_string) = parse_version_and_build(version_and_build); - } - else // no-op - { - out.m_version = {}; - out.m_build_string = {}; + auto [version_str, build_string_str] = parse_version_and_build(version_and_build); + if (!version_str.empty()) + { + out.m_version = VersionSpec::parse(version_str) + .or_else([](ParseError&& error) { throw std::move(error); }) + .value(); + } + if (!build_string_str.empty()) + { + out.m_build_string = MatchSpec::BuildStringSpec(std::string(build_string_str)); + } } - for (const auto& [key, val] : extra) - { - set_matchspec_attribute(out, key, val); - } return out; } From 57d9b0836e48db641317d9b5cbc394ecda10672f Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 15:24:21 +0100 Subject: [PATCH 15/22] Refactor find_matching_parenthesis --- libmamba/include/mamba/util/parsers.hpp | 7 +++---- libmamba/src/util/parsers.cpp | 22 ++++++++++------------ libmamba/tests/src/util/test_parsers.cpp | 6 +++--- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/libmamba/include/mamba/util/parsers.hpp b/libmamba/include/mamba/util/parsers.hpp index 6ad74c8915..c8815bd86a 100644 --- a/libmamba/include/mamba/util/parsers.hpp +++ b/libmamba/include/mamba/util/parsers.hpp @@ -18,11 +18,10 @@ namespace mamba::util { - enum struct ParseError + enum struct ParseError : bool { - Ok, - InvalidInput, - NotFound, + Ok = true, + InvalidInput = false, }; /** diff --git a/libmamba/src/util/parsers.cpp b/libmamba/src/util/parsers.cpp index 420c361fa7..dd86cf91b7 100644 --- a/libmamba/src/util/parsers.cpp +++ b/libmamba/src/util/parsers.cpp @@ -34,31 +34,29 @@ namespace mamba::util const auto start = text.find_first_of(open_or_close_str); if (start == npos) { - err = ParseError::NotFound; return { npos, npos }; } - depth += int(text[start] == open) - int(text[start] == close); - if (depth < 0) - { - err = ParseError::InvalidInput; - return {}; - } - auto end = text.find_first_of(open_or_close_str, start + 1); - while (end != npos) + auto pos = start; + while (pos != npos) { - depth += int(text[end] == open) - int(text[end] == close); + depth += if_else( + open == close, + if_else(depth > 0, -1, 1), // Open or close same parentheses + int(text[pos] == open) - int(text[pos] == close) + ); if (depth == 0) { - return { start, end + 1 }; + return { start, pos + 1 }; } if (depth < 0) { err = ParseError::InvalidInput; return {}; } - end = text.find_first_of(open_or_close_str, end + 1); + pos = text.find_first_of(open_or_close_str, pos + 1); } + err = ParseError::InvalidInput; return {}; } diff --git a/libmamba/tests/src/util/test_parsers.cpp b/libmamba/tests/src/util/test_parsers.cpp index 75f8e3e27d..db34ab2594 100644 --- a/libmamba/tests/src/util/test_parsers.cpp +++ b/libmamba/tests/src/util/test_parsers.cpp @@ -18,9 +18,9 @@ TEST_SUITE("util::parsers") { SUBCASE("Different open/close pair") { - CHECK_EQ(find_matching_parentheses_str("").error(), ParseError::NotFound); - CHECK_EQ(find_matching_parentheses_str("Nothing to see here").error(), ParseError::NotFound); - CHECK_EQ(find_matching_parentheses_str("(hello)", '[', ']').error(), ParseError::NotFound); + CHECK_EQ(find_matching_parentheses_str(""), ""); + CHECK_EQ(find_matching_parentheses_str("Nothing to see here"), ""); + CHECK_EQ(find_matching_parentheses_str("(hello)", '[', ']'), ""); CHECK_EQ(find_matching_parentheses_str("()"), "()"); CHECK_EQ(find_matching_parentheses_str("hello()"), "()"); From fb8e67ebd85fd56d188e574cf7d1cfa292c48924 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 15:47:42 +0100 Subject: [PATCH 16/22] Refactor find_matching_parenthesis --- libmamba/include/mamba/util/parsers.hpp | 23 ++------------- libmamba/src/util/parsers.cpp | 34 +++------------------- libmamba/tests/src/util/test_parsers.cpp | 37 ++++++++++++------------ 3 files changed, 24 insertions(+), 70 deletions(-) diff --git a/libmamba/include/mamba/util/parsers.hpp b/libmamba/include/mamba/util/parsers.hpp index c8815bd86a..5ea5d92fa1 100644 --- a/libmamba/include/mamba/util/parsers.hpp +++ b/libmamba/include/mamba/util/parsers.hpp @@ -32,38 +32,19 @@ namespace mamba::util * If an error is encountered, @p err is modified to contain the error, otherwise it is left * as it is. */ - auto find_matching_parentheses_idx( // + auto find_matching_parentheses( // std::string_view text, ParseError& err, char open = '(', char close = ')' ) noexcept -> std::pair; - /** - * Find the next matching parenthesese pair. - * - * Correctly matches parentheses together so that inner parentheses pairs are skipped. - * Open and closing pairs need not be differents. - */ - [[nodiscard]] auto find_matching_parentheses_idx( // + [[nodiscard]] auto find_matching_parentheses( // std::string_view text, char open = '(', char close = ')' ) noexcept -> tl::expected, ParseError>; - auto find_matching_parentheses_str( // - std::string_view text, - ParseError& err, - char open = '(', - char close = ')' - ) noexcept -> std::string_view; - - [[nodiscard]] auto find_matching_parentheses_str( // - std::string_view text, - char open = '(', - char close = ')' - ) noexcept -> tl::expected; - /** * Find a character or string, except in matching parentheses pairs. * diff --git a/libmamba/src/util/parsers.cpp b/libmamba/src/util/parsers.cpp index dd86cf91b7..8c5d599535 100644 --- a/libmamba/src/util/parsers.cpp +++ b/libmamba/src/util/parsers.cpp @@ -17,7 +17,7 @@ namespace mamba::util * find_matching_parentheses * *******************************/ - auto find_matching_parentheses_idx( // + auto find_matching_parentheses( // std::string_view text, ParseError& err, char open, @@ -47,7 +47,7 @@ namespace mamba::util ); if (depth == 0) { - return { start, pos + 1 }; + return { start, pos }; } if (depth < 0) { @@ -61,14 +61,14 @@ namespace mamba::util return {}; } - auto find_matching_parentheses_idx( // + auto find_matching_parentheses( // std::string_view text, char open, char close ) noexcept -> tl::expected, ParseError> { auto err = ParseError::Ok; - auto out = find_matching_parentheses_idx(text, err, open, close); + auto out = find_matching_parentheses(text, err, open, close); if (err != ParseError::Ok) { return tl::make_unexpected(err); @@ -76,32 +76,6 @@ namespace mamba::util return { out }; } - auto find_matching_parentheses_str( // - std::string_view text, - ParseError& err, - char open, - char close - ) noexcept -> std::string_view - { - const auto [start, end] = find_matching_parentheses_idx(text, err, open, close); - return (start == std::string_view::npos) ? "" : text.substr(start, end); - } - - auto find_matching_parentheses_str( // - std::string_view text, - char open, - char close - ) noexcept -> tl::expected - { - auto err = ParseError::Ok; - const auto [start, end] = find_matching_parentheses_idx(text, err, open, close); - if (err != ParseError::Ok) - { - return tl::make_unexpected(err); - } - return { (start == std::string_view::npos) ? "" : text.substr(start, end) }; - } - /***************************** * find_not_in_parentheses * *****************************/ diff --git a/libmamba/tests/src/util/test_parsers.cpp b/libmamba/tests/src/util/test_parsers.cpp index db34ab2594..7f6bcc361e 100644 --- a/libmamba/tests/src/util/test_parsers.cpp +++ b/libmamba/tests/src/util/test_parsers.cpp @@ -16,32 +16,31 @@ TEST_SUITE("util::parsers") TEST_CASE("find_matching_parentheses") { + using Slice = std::pair; + SUBCASE("Different open/close pair") { - CHECK_EQ(find_matching_parentheses_str(""), ""); - CHECK_EQ(find_matching_parentheses_str("Nothing to see here"), ""); - CHECK_EQ(find_matching_parentheses_str("(hello)", '[', ']'), ""); - - CHECK_EQ(find_matching_parentheses_str("()"), "()"); - CHECK_EQ(find_matching_parentheses_str("hello()"), "()"); - CHECK_EQ(find_matching_parentheses_str("(hello)"), "(hello)"); - CHECK_EQ( - find_matching_parentheses_str("(hello (dear (sir))(!))(how(are(you)))"), - "(hello (dear (sir))(!))" - ); - CHECK_EQ(find_matching_parentheses_str("[hello]", '[', ']'), "[hello]"); - - CHECK_EQ(find_matching_parentheses_str(")(").error(), ParseError::InvalidInput); - CHECK_EQ(find_matching_parentheses_str("((hello)").error(), ParseError::InvalidInput); + CHECK_EQ(find_matching_parentheses(""), Slice(npos, npos)); + CHECK_EQ(find_matching_parentheses("Nothing to see here"), Slice(npos, npos)); + CHECK_EQ(find_matching_parentheses("(hello)", '[', ']'), Slice(npos, npos)); + + CHECK_EQ(find_matching_parentheses("()"), Slice(0, 1)); + CHECK_EQ(find_matching_parentheses("hello()"), Slice(5, 6)); + CHECK_EQ(find_matching_parentheses("(hello)"), Slice(0, 6)); + CHECK_EQ(find_matching_parentheses("(hello (dear (sir))(!))(how(are(you)))"), Slice(0, 22)); + CHECK_EQ(find_matching_parentheses("[hello]", '[', ']'), Slice(0, 6)); + + CHECK_EQ(find_matching_parentheses(")(").error(), ParseError::InvalidInput); + CHECK_EQ(find_matching_parentheses("((hello)").error(), ParseError::InvalidInput); } SUBCASE("Similar open/close pair") { - CHECK_EQ(find_matching_parentheses_str(R"("")", '"', '"'), R"("")"); - CHECK_EQ(find_matching_parentheses_str(R"("hello")", '"', '"'), R"("hello")"); - CHECK_EQ(find_matching_parentheses_str(R"("some","csv","value")", '"', '"'), R"("some")"); + CHECK_EQ(find_matching_parentheses(R"("")", '"', '"'), Slice(0, 1)); + CHECK_EQ(find_matching_parentheses(R"("hello")", '"', '"'), Slice(0, 6)); + CHECK_EQ(find_matching_parentheses(R"("some","csv","value")", '"', '"'), Slice(0, 5)); CHECK_EQ( - find_matching_parentheses_str(R"(Here is "some)", '"', '"').error(), + find_matching_parentheses(R"(Here is "some)", '"', '"').error(), ParseError::InvalidInput ); } From cb244aa890ee7faeb170dafb4d6a221c539f6212 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 15:58:44 +0100 Subject: [PATCH 17/22] Simplify parsers --- libmamba/src/util/parsers.cpp | 50 +++++------------------------------ 1 file changed, 6 insertions(+), 44 deletions(-) diff --git a/libmamba/src/util/parsers.cpp b/libmamba/src/util/parsers.cpp index 8c5d599535..3d075554ee 100644 --- a/libmamba/src/util/parsers.cpp +++ b/libmamba/src/util/parsers.cpp @@ -98,13 +98,7 @@ namespace mamba::util char close ) noexcept -> tl::expected { - auto err = ParseError::Ok; - const auto pos = find_not_in_parentheses(text, c, err, open, close); - if (err != ParseError::Ok) - { - return tl::make_unexpected(err); - } - return { pos }; + return find_not_in_parentheses(text, c, std::array{ open }, std::array{ close }); } auto find_not_in_parentheses( // @@ -115,14 +109,7 @@ namespace mamba::util char close ) noexcept -> std::size_t { - return detail_parsers::find_not_in_parentheses_impl( - text, - val, - err, - std::array{ open }, - std::array{ close }, - detail_parsers::FindParenthesesSearcher() - ); + return find_not_in_parentheses(text, val, err, std::array{ open }, std::array{ close }); } auto find_not_in_parentheses( // @@ -132,13 +119,7 @@ namespace mamba::util char close ) noexcept -> tl::expected { - auto err = ParseError::Ok; - const auto pos = find_not_in_parentheses(text, val, err, open, close); - if (err != ParseError::Ok) - { - return tl::make_unexpected(err); - } - return { pos }; + return find_not_in_parentheses(text, val, std::array{ open }, std::array{ close }); } /****************************** @@ -163,13 +144,7 @@ namespace mamba::util char close ) noexcept -> tl::expected { - auto err = ParseError::Ok; - const auto pos = rfind_not_in_parentheses(text, c, err, open, close); - if (err != ParseError::Ok) - { - return tl::make_unexpected(err); - } - return { pos }; + return rfind_not_in_parentheses(text, c, std::array{ open }, std::array{ close }); } auto rfind_not_in_parentheses( // @@ -180,14 +155,7 @@ namespace mamba::util char close ) noexcept -> std::size_t { - return detail_parsers::find_not_in_parentheses_impl( - text, - val, - err, - std::array{ close }, // swaped - std::array{ open }, - detail_parsers::RFindParenthesesSearcher() - ); + return rfind_not_in_parentheses(text, val, err, std::array{ open }, std::array{ close }); } auto rfind_not_in_parentheses( // @@ -197,13 +165,7 @@ namespace mamba::util char close ) noexcept -> tl::expected { - auto err = ParseError::Ok; - const auto pos = rfind_not_in_parentheses(text, val, err, open, close); - if (err != ParseError::Ok) - { - return tl::make_unexpected(err); - } - return { pos }; + return rfind_not_in_parentheses(text, val, std::array{ open }, std::array{ close }); } /********** From 87fec9ecc00112b1992fb215a14108b1e2e81a15 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 16:06:45 +0100 Subject: [PATCH 18/22] Add find_matching_parentheses with multiple parenthesese --- libmamba/include/mamba/util/parsers.hpp | 121 +++++++++++++++++++++++ libmamba/src/util/parsers.cpp | 44 +-------- libmamba/tests/src/util/test_parsers.cpp | 10 ++ 3 files changed, 133 insertions(+), 42 deletions(-) diff --git a/libmamba/include/mamba/util/parsers.hpp b/libmamba/include/mamba/util/parsers.hpp index 5ea5d92fa1..e1e92dd29b 100644 --- a/libmamba/include/mamba/util/parsers.hpp +++ b/libmamba/include/mamba/util/parsers.hpp @@ -45,6 +45,21 @@ namespace mamba::util char close = ')' ) noexcept -> tl::expected, ParseError>; + template + auto find_matching_parentheses( // + std::string_view text, + ParseError& err, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> std::pair; + + template + [[nodiscard]] auto find_matching_parentheses( // + std::string_view text, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> tl::expected, ParseError>; + /** * Find a character or string, except in matching parentheses pairs. * @@ -281,6 +296,67 @@ namespace mamba::util } }; + template + auto find_matching_parentheses_impl( + std::string_view text, + ParseError& err, + const std::array& open, + const std::array& close, + Searcher&& searcher + ) noexcept -> std::pair + { + // TODO(C++20): After allocating tokens and depths here, call an impl function using + // std::span defined in .cpp + static constexpr auto npos = std::string_view::npos; + + const auto tokens = detail_parsers::concat_array(open, close); + const auto tokens_str = std::string_view(tokens.data(), tokens.size()); + + auto depths = std::array{}; // Plus one for branchless depths code + + const auto start = searcher.find_first(text, tokens_str); + if (start == npos) + { + return { npos, npos }; + } + + auto pos = start; + while (pos != npos) + { + // Change depth of corresponding open/close pair, writting in index P for + // the one not matching. + const auto open_depth_idx = detail_parsers::find(open, text[pos]); + const auto close_depth_idx = detail_parsers::find(close, text[pos]); + depths[open_depth_idx] += int(open_depth_idx < open.size()); + depths[close_depth_idx] -= int(close_depth_idx < open.size()); + // When open and close are the same character, depth did not change so we make + // a swap operation + depths[open_depth_idx] = if_else( + open_depth_idx == close_depth_idx, + if_else(depths[open_depth_idx] > 0, 0, 1), // swap 0 and 1 + depths[open_depth_idx] + ); + depths[P] = 0; + + // All parentheses are properly closed, we found the matching one. + if (depths == decltype(depths){}) + { + return { start, pos }; + } + + // Any negative depth means mismatched parentheses + for (auto d : depths) + { + err = if_else(d < 0, ParseError::InvalidInput, err); + } + + pos = searcher.find_next(text, tokens_str, pos); + } + + err = ParseError::InvalidInput; + return { start, npos }; + } + template auto find_not_in_parentheses_impl( std::string_view text, @@ -346,6 +422,47 @@ namespace mamba::util } } + /******************************* + * find_matching_parentheses * + *******************************/ + + template + auto find_matching_parentheses( // + std::string_view text, + ParseError& err, + const std::array& open, + const std::array& close + ) noexcept -> std::pair + { + return detail_parsers::find_matching_parentheses_impl( + text, + err, + open, + close, + detail_parsers::FindParenthesesSearcher() + ); + } + + template + [[nodiscard]] auto find_matching_parentheses( // + std::string_view text, + const std::array& open, + const std::array& close + ) noexcept -> tl::expected, ParseError> + { + auto err = ParseError::Ok; + auto out = find_matching_parentheses(text, err, open, close); + if (err != ParseError::Ok) + { + return tl::make_unexpected(err); + } + return { out }; + } + + /***************************** + * find_not_in_parentheses * + *****************************/ + template auto find_not_in_parentheses( std::string_view text, @@ -418,6 +535,10 @@ namespace mamba::util return { pos }; } + /****************************** + * rfind_not_in_parentheses * + ******************************/ + template auto rfind_not_in_parentheses( std::string_view text, diff --git a/libmamba/src/util/parsers.cpp b/libmamba/src/util/parsers.cpp index 3d075554ee..aca08d981a 100644 --- a/libmamba/src/util/parsers.cpp +++ b/libmamba/src/util/parsers.cpp @@ -24,41 +24,7 @@ namespace mamba::util char close ) noexcept -> std::pair { - static constexpr auto npos = std::string_view::npos; - - const auto open_or_close = std::array{ open, close }; - const auto open_or_close_str = std::string_view(open_or_close.data(), open_or_close.size()); - - int depth = 0; - - const auto start = text.find_first_of(open_or_close_str); - if (start == npos) - { - return { npos, npos }; - } - - auto pos = start; - while (pos != npos) - { - depth += if_else( - open == close, - if_else(depth > 0, -1, 1), // Open or close same parentheses - int(text[pos] == open) - int(text[pos] == close) - ); - if (depth == 0) - { - return { start, pos }; - } - if (depth < 0) - { - err = ParseError::InvalidInput; - return {}; - } - pos = text.find_first_of(open_or_close_str, pos + 1); - } - - err = ParseError::InvalidInput; - return {}; + return find_matching_parentheses(text, err, std::array{ open }, std::array{ close }); } auto find_matching_parentheses( // @@ -67,13 +33,7 @@ namespace mamba::util char close ) noexcept -> tl::expected, ParseError> { - auto err = ParseError::Ok; - auto out = find_matching_parentheses(text, err, open, close); - if (err != ParseError::Ok) - { - return tl::make_unexpected(err); - } - return { out }; + return find_matching_parentheses(text, std::array{ open }, std::array{ close }); } /***************************** diff --git a/libmamba/tests/src/util/test_parsers.cpp b/libmamba/tests/src/util/test_parsers.cpp index 7f6bcc361e..aa13e32bcd 100644 --- a/libmamba/tests/src/util/test_parsers.cpp +++ b/libmamba/tests/src/util/test_parsers.cpp @@ -32,6 +32,11 @@ TEST_SUITE("util::parsers") CHECK_EQ(find_matching_parentheses(")(").error(), ParseError::InvalidInput); CHECK_EQ(find_matching_parentheses("((hello)").error(), ParseError::InvalidInput); + + static constexpr auto opens = std::array{ '[', '(' }; + static constexpr auto closes = std::array{ ']', ')' }; + CHECK_EQ(find_matching_parentheses("([hello])", opens, closes), Slice(0, 8)); + CHECK_EQ(find_matching_parentheses("(hello)[hello]", opens, closes), Slice(0, 6)); } SUBCASE("Similar open/close pair") @@ -43,6 +48,11 @@ TEST_SUITE("util::parsers") find_matching_parentheses(R"(Here is "some)", '"', '"').error(), ParseError::InvalidInput ); + + static constexpr auto opens = std::array{ '[', '(', '\'' }; + static constexpr auto closes = std::array{ ']', ')', '\'' }; + CHECK_EQ(find_matching_parentheses("'[hello]'", opens, closes), Slice(0, 8)); + CHECK_EQ(find_matching_parentheses("hello['hello', 'world']", opens, closes), Slice(5, 22)); } } From 2ef46dfed35f6b8dc09577a359e91872a4dfc487 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 16:19:16 +0100 Subject: [PATCH 19/22] Add rfind_matching_parentheses --- libmamba/include/mamba/util/parsers.hpp | 76 +++++++++++++++++++++++- libmamba/src/util/parsers.cpp | 23 +++++++ libmamba/tests/src/util/test_parsers.cpp | 45 ++++++++++++++ 3 files changed, 143 insertions(+), 1 deletion(-) diff --git a/libmamba/include/mamba/util/parsers.hpp b/libmamba/include/mamba/util/parsers.hpp index e1e92dd29b..3f457665b9 100644 --- a/libmamba/include/mamba/util/parsers.hpp +++ b/libmamba/include/mamba/util/parsers.hpp @@ -25,7 +25,7 @@ namespace mamba::util }; /** - * Find the next matching parenthesese pair. + * Find the first opening parenthesis and its matching pair. * * Correctly matches parenteses together so that inner parentheses pairs are skipped. * Open and closing pairs need not be differents. @@ -60,6 +60,42 @@ namespace mamba::util const std::array& close = { ')', ']' } ) noexcept -> tl::expected, ParseError>; + /** + * Find the last closing parenthesese and its matching pair. + * + * Correctly matches parenteses together so that inner parentheses pairs are skipped. + * Open and closing pairs need not be differents. + * If an error is encountered, @p err is modified to contain the error, otherwise it is left + * as it is. + */ + auto rfind_matching_parentheses( // + std::string_view text, + ParseError& err, + char open = '(', + char close = ')' + ) noexcept -> std::pair; + + [[nodiscard]] auto rfind_matching_parentheses( // + std::string_view text, + char open = '(', + char close = ')' + ) noexcept -> tl::expected, ParseError>; + + template + auto rfind_matching_parentheses( // + std::string_view text, + ParseError& err, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> std::pair; + + template + [[nodiscard]] auto rfind_matching_parentheses( // + std::string_view text, + const std::array& open = { '(', '[' }, + const std::array& close = { ')', ']' } + ) noexcept -> tl::expected, ParseError>; + /** * Find a character or string, except in matching parentheses pairs. * @@ -459,6 +495,44 @@ namespace mamba::util return { out }; } + /******************************** + * rfind_matching_parentheses * + ********************************/ + + template + auto rfind_matching_parentheses( // + std::string_view text, + ParseError& err, + const std::array& open, + const std::array& close + ) noexcept -> std::pair + { + auto [last, first] = detail_parsers::find_matching_parentheses_impl( + text, + err, + close, // swaped + open, + detail_parsers::RFindParenthesesSearcher() + ); + return { first, last }; + } + + template + [[nodiscard]] auto rfind_matching_parentheses( // + std::string_view text, + const std::array& open, + const std::array& close + ) noexcept -> tl::expected, ParseError> + { + auto err = ParseError::Ok; + auto out = rfind_matching_parentheses(text, err, open, close); + if (err != ParseError::Ok) + { + return tl::make_unexpected(err); + } + return { out }; + } + /***************************** * find_not_in_parentheses * *****************************/ diff --git a/libmamba/src/util/parsers.cpp b/libmamba/src/util/parsers.cpp index aca08d981a..df0544e81d 100644 --- a/libmamba/src/util/parsers.cpp +++ b/libmamba/src/util/parsers.cpp @@ -36,6 +36,29 @@ namespace mamba::util return find_matching_parentheses(text, std::array{ open }, std::array{ close }); } + /******************************** + * rfind_matching_parentheses * + ********************************/ + + auto rfind_matching_parentheses( // + std::string_view text, + ParseError& err, + char open, + char close + ) noexcept -> std::pair + { + return rfind_matching_parentheses(text, err, std::array{ open }, std::array{ close }); + } + + auto rfind_matching_parentheses( // + std::string_view text, + char open, + char close + ) noexcept -> tl::expected, ParseError> + { + return rfind_matching_parentheses(text, std::array{ open }, std::array{ close }); + } + /***************************** * find_not_in_parentheses * *****************************/ diff --git a/libmamba/tests/src/util/test_parsers.cpp b/libmamba/tests/src/util/test_parsers.cpp index aa13e32bcd..4d34d16def 100644 --- a/libmamba/tests/src/util/test_parsers.cpp +++ b/libmamba/tests/src/util/test_parsers.cpp @@ -56,6 +56,51 @@ TEST_SUITE("util::parsers") } } + TEST_CASE("rfind_matching_parentheses") + { + using Slice = std::pair; + + SUBCASE("Different open/close pair") + { + CHECK_EQ(rfind_matching_parentheses(""), Slice(npos, npos)); + CHECK_EQ(rfind_matching_parentheses("Nothing to see here"), Slice(npos, npos)); + CHECK_EQ(rfind_matching_parentheses("(hello)", '[', ']'), Slice(npos, npos)); + + CHECK_EQ(rfind_matching_parentheses("()"), Slice(0, 1)); + CHECK_EQ(rfind_matching_parentheses("hello()"), Slice(5, 6)); + CHECK_EQ(rfind_matching_parentheses("(hello)dear"), Slice(0, 6)); + CHECK_EQ( + rfind_matching_parentheses("(hello (dear (sir))(!))(how(are(you)))"), + Slice(23, 37) + ); + CHECK_EQ(rfind_matching_parentheses("[hello]", '[', ']'), Slice(0, 6)); + + CHECK_EQ(rfind_matching_parentheses(")(").error(), ParseError::InvalidInput); + CHECK_EQ(rfind_matching_parentheses("(hello))").error(), ParseError::InvalidInput); + + static constexpr auto opens = std::array{ '[', '(' }; + static constexpr auto closes = std::array{ ']', ')' }; + CHECK_EQ(rfind_matching_parentheses("([hello])", opens, closes), Slice(0, 8)); + CHECK_EQ(rfind_matching_parentheses("(hello)[hello]", opens, closes), Slice(7, 13)); + } + + SUBCASE("Similar open/close pair") + { + CHECK_EQ(rfind_matching_parentheses(R"("")", '"', '"'), Slice(0, 1)); + CHECK_EQ(rfind_matching_parentheses(R"("hello")", '"', '"'), Slice(0, 6)); + CHECK_EQ(rfind_matching_parentheses(R"("some","csv","value")", '"', '"'), Slice(13, 19)); + CHECK_EQ( + rfind_matching_parentheses(R"(Here is "some)", '"', '"').error(), + ParseError::InvalidInput + ); + + static constexpr auto opens = std::array{ '[', '(', '\'' }; + static constexpr auto closes = std::array{ ']', ')', '\'' }; + CHECK_EQ(rfind_matching_parentheses("'[hello]'", opens, closes), Slice(0, 8)); + CHECK_EQ(rfind_matching_parentheses("['hello', 'world']dear", opens, closes), Slice(0, 17)); + } + } + TEST_CASE("find_not_in_parentheses") { SUBCASE("Single char and different open/close pair") From 274d3505f633e8dcc4e7ddd433913d2efccc1b09 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 17:17:09 +0100 Subject: [PATCH 20/22] Refactor MatchSpec section parsing --- libmamba/src/specs/match_spec.cpp | 74 ++++++++++++++++--------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index d4d6d25413..5d429a03f0 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -87,14 +87,14 @@ namespace mamba::specs namespace { - inline constexpr auto open_or_quote = std::array{ + inline constexpr auto open_or_quote_tokens = std::array{ MatchSpec::prefered_list_open, MatchSpec::alt_list_open, MatchSpec::prefered_quote, MatchSpec::alt_quote, }; - inline constexpr auto close_or_quote = std::array{ + inline constexpr auto close_or_quote_tokens = std::array{ MatchSpec::prefered_list_close, MatchSpec::alt_list_close, MatchSpec::prefered_quote, @@ -118,8 +118,8 @@ namespace mamba::specs return util::rfind_not_in_parentheses( str, MatchSpec::channel_namespace_spec_sep, - open_or_quote, - close_or_quote + open_or_quote_tokens, + close_or_quote_tokens ) // FIXME temporary while MatchSpec::parse does not return ``exepted``. .or_else( @@ -133,6 +133,21 @@ namespace mamba::specs ; } + auto rfind_attribute_section(std::string_view str) + { + return util::rfind_matching_parentheses(str, open_or_quote_tokens, close_or_quote_tokens) + // FIXME temporary while MatchSpec::parse does not return ``exepted``. + .or_else( + [&](const auto&) { + throw std::invalid_argument( + fmt::format(R"(Invalid parenthesis in MatchSpec "{}")", str) + ); + } + ) + .value(); + ; + } + auto split_channel_namespace_spec(std::string_view str) -> std::tuple { @@ -167,7 +182,12 @@ namespace mamba::specs auto find_attribute_split(std::string_view str) { - return util::find_not_in_parentheses(str, MatchSpec::attribute_sep, open_or_quote, close_or_quote) + return util::find_not_in_parentheses( + str, + MatchSpec::attribute_sep, + open_or_quote_tokens, + close_or_quote_tokens + ) // FIXME temporary while MatchSpec::parse does not return ``exepted``. .or_else( [&](const auto&) { @@ -332,39 +352,23 @@ namespace mamba::specs .value(); } - auto spec_str = std::string(spec); - - std::smatch match; - - // Step 3. strip off brackets portion - static std::regex brackets_re(".*(?:(\\[.*\\]))"); - if (std::regex_search(spec_str, match, brackets_re)) + // Parsing all attributes sections backwards, for instance in + // ``conda-forge::foo[build=3](target=blarg,optional)`` + // this results in: + // - ``target=blarg,optional`` + // - ``build=3`` + spec = util::rstrip(spec); + while (util::ends_with(spec, prefered_list_close) || util::ends_with(spec, alt_list_close)) { - auto brackets_str = match[1].str(); - brackets_str = brackets_str.substr(1, brackets_str.size() - 2); - set_matchspec_attributes(out, brackets_str); - spec_str.erase( - static_cast(match.position(1)), - static_cast(match.length(1)) - ); + auto [start, end] = rfind_attribute_section(spec); + assert(start != npos); + assert(end != npos); + assert(start < end); + set_matchspec_attributes(out, spec.substr(start + 1, end - start - 1)); + spec = util::rstrip(spec.substr(0, start)); } - // Step 4. strip off parens portion - static std::regex parens_re(".*(?:(\\(.*\\)))"); - if (std::regex_search(spec_str, match, parens_re)) - { - auto parens_str = match[1].str(); - parens_str = parens_str.substr(1, parens_str.size() - 2); - set_matchspec_attributes(out, parens_str); - if (parens_str.find("optional") != parens_str.npos) - { - out.extra().optional = true; - } - spec_str.erase( - static_cast(match.position(1)), - static_cast(match.length(1)) - ); - } + auto spec_str = std::string(spec); // support faulty conda matchspecs such as `libblas=[build=*mkl]`, which is // the repr of `libblas=*=*mkl` From 3190f1b353396e9f83ca3242195675d41a87f534 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 28 Feb 2024 17:49:50 +0100 Subject: [PATCH 21/22] Refacator MatchSpec name parsing --- libmamba/include/mamba/specs/match_spec.hpp | 1 + libmamba/src/specs/match_spec.cpp | 48 ++++++++------------- 2 files changed, 20 insertions(+), 29 deletions(-) diff --git a/libmamba/include/mamba/specs/match_spec.hpp b/libmamba/include/mamba/specs/match_spec.hpp index 3da295a86d..1556e6284e 100644 --- a/libmamba/include/mamba/specs/match_spec.hpp +++ b/libmamba/include/mamba/specs/match_spec.hpp @@ -39,6 +39,7 @@ namespace mamba::specs inline static constexpr char channel_namespace_spec_sep = ':'; inline static constexpr char attribute_sep = ','; inline static constexpr char attribute_assign = '='; + inline static constexpr auto package_version_sep = std::array{ ' ', '=', '<', '>', '~', '!' }; [[nodiscard]] static auto parse(std::string_view spec) -> MatchSpec; diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index 5d429a03f0..24a547e518 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -4,7 +4,6 @@ // // The full license is in the file LICENSE, distributed with this software. -#include #include #include #include @@ -101,6 +100,12 @@ namespace mamba::specs MatchSpec::alt_quote, }; + template + [[nodiscard]] constexpr auto contains(const Range& range, T elem) -> bool + { + return std::find(range.cbegin(), range.cend(), elem) != range.cend(); + } + /** Return true if the string is a valid hash hex representation. */ auto is_hash(std::string_view text) -> bool { @@ -368,42 +373,27 @@ namespace mamba::specs spec = util::rstrip(spec.substr(0, start)); } - auto spec_str = std::string(spec); + // Split the package name and version in ``pkg 1.5`` or ``pkg>=1.3=bld``. + auto [pkg_name, version_and_build] = util::lstrip_if_parts( + util::lstrip(spec), + [](char c) -> bool { return !contains(MatchSpec::package_version_sep, c); } + ); - // support faulty conda matchspecs such as `libblas=[build=*mkl]`, which is - // the repr of `libblas=*=*mkl` - if (spec_str.back() == '=') - { - spec_str.erase(spec_str.end() - 1); - } - // This is #6 of the spec parsing - // Look for version *and* build string and separator - auto version_and_build = std::string(); - static std::regex version_build_re("([^ =<>!~]+)?([> Date: Wed, 28 Feb 2024 17:55:08 +0100 Subject: [PATCH 22/22] Simplify version and build parsing --- libmamba/src/specs/match_spec.cpp | 168 ++++++++++++++++-------------- 1 file changed, 89 insertions(+), 79 deletions(-) diff --git a/libmamba/src/specs/match_spec.cpp b/libmamba/src/specs/match_spec.cpp index 24a547e518..1cce9c5aa4 100644 --- a/libmamba/src/specs/match_spec.cpp +++ b/libmamba/src/specs/match_spec.cpp @@ -58,32 +58,6 @@ namespace mamba::specs return out; } - namespace - { - auto parse_version_and_build(std::string_view s) - -> std::pair - { - const std::size_t pos = s.find_last_of(" ="); - if (pos == s.npos || pos == 0) - { - return { s, {} }; - } - - if (char c = s[pos]; c == '=') - { - std::size_t pm1 = pos - 1; - char d = s[pm1]; - if (d == '=' || d == '!' || d == '|' || d == ',' || d == '<' || d == '>' || d == '~') - { - return { s, {} }; - } - } - // c is either ' ' or pm1 is none of the forbidden chars - - return { s.substr(0, pos), s.substr(pos + 1) }; - } - } - namespace { inline constexpr auto open_or_quote_tokens = std::array{ @@ -305,30 +279,97 @@ namespace mamba::specs { set_matchspec_attributes(spec, attrs.substr(next_pos + 1)); } - }; + } + + auto rparse_and_set_matchspec_attributes(MatchSpec& spec, std::string_view str) + -> std::string_view + { + // Parsing all attributes sections backwards, for instance in + // ``conda-forge::foo[build=3](target=blarg,optional)`` + // this results in: + // - ``target=blarg,optional`` + // - ``build=3`` + str = util::rstrip(str); + while (util::ends_with(str, MatchSpec::prefered_list_close) + || util::ends_with(str, MatchSpec::alt_list_close)) + { + auto [start, end] = rfind_attribute_section(str); + assert(start != std::string::npos); + assert(end != std::string::npos); + assert(start < end); + set_matchspec_attributes(spec, str.substr(start + 1, end - start - 1)); + str = util::rstrip(str.substr(0, start)); + } + return util::lstrip(str); + } + + auto split_version_and_build(std::string_view str) + -> std::pair + { + str = util::strip(str); + + // Support faulty conda matchspecs such as `libblas=[build=*mkl]`, which is + // the repr of `libblas=*=*mkl` + str = util::rstrip(str, '='); + + const auto pos = str.find_last_of(" ="); + if (pos == str.npos || pos == 0) + { + return { str, {} }; + } + + if (char c = str[pos]; c == '=') + { + char d = str[pos - 1]; + if (d == '=' || d == '!' || d == '|' || d == ',' || d == '<' || d == '>' || d == '~') + { + return { str, {} }; + } + } + + // c is either ' ' or d is none of the forbidden chars + return { str.substr(0, pos), str.substr(pos + 1) }; + } + + auto split_name_version_and_build(std::string_view str) + { + // Split the package name and version in ``pkg 1.5`` or ``pkg>=1.3=bld``. + auto [pkg_name, version_and_build] = util::lstrip_if_parts( + str, + [](char c) -> bool { return !contains(MatchSpec::package_version_sep, c); } + ); + + if (pkg_name.empty()) + { + throw std::invalid_argument("Empty package name"); + } + + auto [version_str, build_string_str] = split_version_and_build(version_and_build); + return std::tuple(pkg_name, version_str, build_string_str); + } } - auto MatchSpec::parse(std::string_view spec) -> MatchSpec + auto MatchSpec::parse(std::string_view str) -> MatchSpec { static constexpr auto npos = std::string_view::npos; - spec = util::strip(spec); - if (spec.empty()) + str = util::strip(str); + if (str.empty()) { return {}; } // A plain URL like https://conda.anaconda.org/conda-forge/linux-64/pkg-6.4-bld.conda - if (has_archive_extension(spec)) + if (has_archive_extension(str)) { - return MatchSpec::parse_url(spec); + return MatchSpec::parse_url(str); } // A URL with hash, generated by `mamba env export --explicit` like // https://conda.anaconda.org/conda-forge/linux-64/pkg-6.4-bld.conda#7dbaa197d7ba6032caf7ae7f32c1efa0 - if (const auto idx = spec.rfind(url_md5_sep); idx != npos) + if (const auto idx = str.rfind(url_md5_sep); idx != npos) { - auto url = spec.substr(0, idx); - auto hash = spec.substr(idx + 1); + auto url = str.substr(0, idx); + auto hash = str.substr(idx + 1); if (has_archive_extension(url)) { auto out = MatchSpec::parse_url(url); @@ -349,7 +390,7 @@ namespace mamba::specs // - ``namespace`` // - ``spec >=3 [attr="val", ...]`` auto chan = std::string_view(); - std::tie(chan, out.m_name_space, spec) = split_channel_namespace_spec(spec); + std::tie(chan, out.m_name_space, str) = split_channel_namespace_spec(str); if (!chan.empty()) { out.m_channel = UnresolvedChannel::parse(chan) @@ -357,54 +398,23 @@ namespace mamba::specs .value(); } - // Parsing all attributes sections backwards, for instance in - // ``conda-forge::foo[build=3](target=blarg,optional)`` - // this results in: - // - ``target=blarg,optional`` - // - ``build=3`` - spec = util::rstrip(spec); - while (util::ends_with(spec, prefered_list_close) || util::ends_with(spec, alt_list_close)) - { - auto [start, end] = rfind_attribute_section(spec); - assert(start != npos); - assert(end != npos); - assert(start < end); - set_matchspec_attributes(out, spec.substr(start + 1, end - start - 1)); - spec = util::rstrip(spec.substr(0, start)); - } + auto pkg_ver_bld = rparse_and_set_matchspec_attributes(out, str); + + auto [name, ver, bld] = split_name_version_and_build(pkg_ver_bld); - // Split the package name and version in ``pkg 1.5`` or ``pkg>=1.3=bld``. - auto [pkg_name, version_and_build] = util::lstrip_if_parts( - util::lstrip(spec), - [](char c) -> bool { return !contains(MatchSpec::package_version_sep, c); } - ); + out.m_name = NameSpec(std::string(name)); - if (pkg_name.empty()) + // Set the version and build string, but avoid overriding in case nothing is specified + // as it may already be set in attribute as in ``numpy[version=1.12]``. + if (!ver.empty()) { - throw std::invalid_argument("Empty package name"); + out.m_version = VersionSpec::parse(ver) + .or_else([](ParseError&& error) { throw std::move(error); }) + .value(); } - out.m_name = NameSpec(std::string(pkg_name)); - - // Support faulty conda matchspecs such as `libblas=[build=*mkl]`, which is - // the repr of `libblas=*=*mkl` - version_and_build = util::strip(version_and_build); - version_and_build = util::rstrip(version_and_build, '='); - - // # Step 7. otherwise sort out version + build - // spec_str = spec_str and spec_str.strip() - if (!version_and_build.empty()) + if (!bld.empty()) { - auto [version_str, build_string_str] = parse_version_and_build(version_and_build); - if (!version_str.empty()) - { - out.m_version = VersionSpec::parse(version_str) - .or_else([](ParseError&& error) { throw std::move(error); }) - .value(); - } - if (!build_string_str.empty()) - { - out.m_build_string = MatchSpec::BuildStringSpec(std::string(build_string_str)); - } + out.m_build_string = MatchSpec::BuildStringSpec(std::string(bld)); } return out;