Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 47 additions & 10 deletions src/core/uri/uri.cc
Original file line number Diff line number Diff line change
Expand Up @@ -464,21 +464,26 @@ auto URI::recompose_without_fragment() const -> std::optional<std::string> {
// Scheme
const auto result_scheme{this->scheme()};
if (result_scheme.has_value()) {
result << result_scheme.value();
if (this->is_urn() || this->is_tag() || this->is_mailto()) {
result << ":";
} else {
result << "://";
}
result << result_scheme.value() << ":";
}

// Authority
const auto user_info{this->userinfo()};
const auto result_host{this->host()};
const auto result_port{this->port()};
const bool has_authority{user_info.has_value() || result_host.has_value() ||
result_port.has_value()};

// Add "//" prefix when we have authority (with or without scheme)
if (has_authority) {
result << "//";
}

if (user_info.has_value()) {
result << user_info.value() << "@";
}

// Host
const auto result_host{this->host()};
if (result_host.has_value()) {
if (this->is_ipv6()) {
// By default uriparser will parse the IPv6 address without brackets
Expand All @@ -493,15 +498,24 @@ auto URI::recompose_without_fragment() const -> std::optional<std::string> {
}

// Port
const auto result_port{this->port()};
if (result_port.has_value()) {
result << ':' << result_port.value();
}

// Path
const auto result_path{this->path()};
if (result_path.has_value()) {
result << result_path.value();
std::string path_str{result_path.value()};
// RFC 3986: If there's a scheme but no authority, the path cannot start
// with "//" to avoid confusion with network-path references. Also,
// uriparser sometimes adds a leading "/" to paths when normalizing URIs
// like "g:h", which should have path "h" not "/h". We strip the leading "/"
// in this case.
if (result_scheme.has_value() && !has_authority &&
path_str.starts_with("/") && !path_str.starts_with("//")) {
path_str = path_str.substr(1);
}
result << path_str;
}

// Query
Expand Down Expand Up @@ -581,6 +595,12 @@ auto URI::canonicalize() -> URI & {
}

auto URI::resolve_from(const URI &base) -> URI & {
// RFC 3986 Section 5.2.2: If the reference has a scheme, it's already
// absolute and should be used as-is (just normalize it)
if (this->is_absolute()) {
return *this;
}

// Handle special case: fragment-only URI with a base that has no fragment
if (this->is_fragment_only() && !base.fragment().has_value()) {
this->data = base.data;
Expand All @@ -602,31 +622,48 @@ auto URI::resolve_from(const URI &base) -> URI & {
copy.host_ = "placeholder";
}

// IMPORTANT: We need to parse the reference WITHOUT normalization
// because normalization removes dot segments ("." and "./") which
// should only be removed AFTER resolution, not before.
// The issue is that uri_parse() calls uri_normalize(), so we need to
// parse the original data again without normalization.
UriUriA unnormalized_reference;
const char *error_position = nullptr;
if (uriParseSingleUriA(&unnormalized_reference, this->data.c_str(),
&error_position) != URI_SUCCESS) {
throw URIParseError{
static_cast<std::uint64_t>(error_position - this->data.c_str() + 1)};
}

UriUriA absoluteDest;
// Looks like this function allocates to the output variable
// even on failure.
// See https://uriparser.github.io/doc/api/latest/
switch (uriAddBaseUriExA(&absoluteDest, &this->internal->uri,
switch (uriAddBaseUriExA(&absoluteDest, &unnormalized_reference,
&copy.internal->uri, URI_RESOLVE_STRICTLY)) {
case URI_SUCCESS:
break;
case URI_ERROR_ADDBASE_REL_BASE:
uriFreeUriMembersA(&absoluteDest);
uriFreeUriMembersA(&unnormalized_reference);
assert(!copy.is_absolute());
throw URIError{"Base URI is not absolute"};
default:
uriFreeUriMembersA(&absoluteDest);
uriFreeUriMembersA(&unnormalized_reference);
throw URIError{"Could not resolve URI"};
}

try {
uri_normalize(&absoluteDest);
this->data = uri_to_string(&absoluteDest);
uriFreeUriMembersA(&absoluteDest);
uriFreeUriMembersA(&unnormalized_reference);
this->parse();
return *this;
} catch (...) {
uriFreeUriMembersA(&absoluteDest);
uriFreeUriMembersA(&unnormalized_reference);
throw;
}
}
Expand Down
74 changes: 74 additions & 0 deletions test/uri/uri_host_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,77 @@ TEST(URI_host, urn) {
const sourcemeta::core::URI uri{"urn:example:schema"};
EXPECT_FALSE(uri.host().has_value());
}

TEST(URI_host, rfc3986_ipv4_address) {
const sourcemeta::core::URI uri{"http://192.168.1.1/path"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "192.168.1.1");
}

TEST(URI_host, rfc3986_ipv4_address_with_port) {
const sourcemeta::core::URI uri{"http://192.168.1.1:8080/path"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "192.168.1.1");
}

TEST(URI_host, rfc3986_ipv6_address) {
const sourcemeta::core::URI uri{"http://[2001:db8::1]/path"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "2001:db8::1");
}

TEST(URI_host, rfc3986_ipv6_address_with_port) {
const sourcemeta::core::URI uri{"http://[2001:db8::1]:8080/path"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "2001:db8::1");
}

TEST(URI_host, rfc3986_ipv6_localhost) {
const sourcemeta::core::URI uri{"http://[::1]/path"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "::1");
}

TEST(URI_host, rfc3986_host_case_insensitive) {
const sourcemeta::core::URI uri1{"http://EXAMPLE.COM/path"};
const sourcemeta::core::URI uri2{"http://example.com/path"};
EXPECT_EQ(uri1.host().value(), "example.com");
EXPECT_EQ(uri2.host().value(), "example.com");
EXPECT_EQ(uri1.host().value(), uri2.host().value());
}

TEST(URI_host, rfc3986_host_with_hyphen) {
const sourcemeta::core::URI uri{"http://my-example-host.com/path"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "my-example-host.com");
}

TEST(URI_host, rfc3986_host_with_numbers) {
const sourcemeta::core::URI uri{"http://example123.com/path"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "example123.com");
}

TEST(URI_host, rfc3986_subdomain) {
const sourcemeta::core::URI uri{"http://www.sub.example.com/path"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "www.sub.example.com");
}

TEST(URI_host, rfc3986_localhost) {
const sourcemeta::core::URI uri{"http://localhost/path"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "localhost");
}

TEST(URI_host, rfc3986_empty_host_with_authority) {
const sourcemeta::core::URI uri{"file:///path/to/file"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "");
}

TEST(URI_host, rfc3986_percent_encoded_host) {
const sourcemeta::core::URI uri{"http://example%2Ecom/path"};
EXPECT_TRUE(uri.host().has_value());
EXPECT_EQ(uri.host().value(), "example.com");
}
80 changes: 80 additions & 0 deletions test/uri/uri_is_absolute_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,83 @@ TEST(URI_is_absolute, slash) {
const sourcemeta::core::URI uri{"/foo"};
EXPECT_FALSE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_http_scheme) {
const sourcemeta::core::URI uri{"http://example.com/path"};
EXPECT_TRUE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_https_scheme) {
const sourcemeta::core::URI uri{"https://example.com/path"};
EXPECT_TRUE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_ftp_scheme) {
const sourcemeta::core::URI uri{"ftp://ftp.example.com/file"};
EXPECT_TRUE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_file_scheme) {
const sourcemeta::core::URI uri{"file:///path/to/file"};
EXPECT_TRUE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_mailto_scheme) {
const sourcemeta::core::URI uri{"mailto:user@example.com"};
EXPECT_TRUE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_tel_scheme) {
const sourcemeta::core::URI uri{"tel:+1-555-1212"};
EXPECT_TRUE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_data_scheme) {
const sourcemeta::core::URI uri{"data:text/plain;base64,SGVsbG8="};
EXPECT_TRUE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_relative_path) {
const sourcemeta::core::URI uri{"relative/path"};
EXPECT_FALSE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_absolute_path) {
const sourcemeta::core::URI uri{"/absolute/path"};
EXPECT_FALSE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_network_path) {
const sourcemeta::core::URI uri{"//example.com/path"};
EXPECT_FALSE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_query_only) {
const sourcemeta::core::URI uri{"?query=value"};
EXPECT_FALSE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_fragment_only) {
const sourcemeta::core::URI uri{"#fragment"};
EXPECT_FALSE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_empty_uri) {
const sourcemeta::core::URI uri{""};
EXPECT_FALSE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_dot_relative) {
const sourcemeta::core::URI uri{"."};
EXPECT_FALSE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_dotdot_relative) {
const sourcemeta::core::URI uri{".."};
EXPECT_FALSE(uri.is_absolute());
}

TEST(URI_is_absolute, rfc3986_scheme_with_fragment) {
const sourcemeta::core::URI uri{"http://example.com/path#fragment"};
EXPECT_TRUE(uri.is_absolute());
}
Loading