Skip to content
This repository has been archived by the owner on Jun 17, 2024. It is now read-only.

Bug 1803465 - extend isURLLenient to match IPv6 literals #4090

Merged
merged 18 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,20 @@ object URLStringUtils {

private val isURLLenient by lazy {
// Be lenient about what is classified as potentially a URL.
// (\w+-+)*\w+(://[/]*|:|\.)(\w+-+)*\w+([\S&&[^\w-]]\S*)?
// ------- -------
// (\w+-+)*[\w\[]+(://[/]*|:|\.)(\w+-+)*[\w\[:]+([\S&&[^\w-]]\S*)?
// -------- --------
// 0 or more pairs of consecutive word letters or dashes
// --- ---
// followed by at least a single word letter.
// ----------- ----------
// ------- --------
// followed by at least a single word letter or [ipv6::] character.
// --------------- ----------------
// Combined, that means "w", "w-w", "w-w-w", etc match, but "w-", "w-w-", "w-w-w-" do not.
// --------------
// --------------
// That surrounds :, :// or .
// -
// -
// At the end, there may be an optional
// ------------
// ------------
// non-word, non-- but still non-space character (e.g., ':', '/', '.', '?' but not 'a', '-', '\t')
// ---
// ---
// and 0 or more non-space characters.
//
// These are some (odd) examples of valid urls according to this pattern:
Expand All @@ -77,7 +77,7 @@ object URLStringUtils {
// www.c-c-
// 3-3
Pattern.compile(
"^\\s*(\\w+-+)*\\w+(://[/]*|:|\\.)(\\w+-+)*\\w+([\\S&&[^\\w-]]\\S*)?\\s*$",
"^\\s*(\\w+-+)*[\\w\\[]+(://[/]*|:|\\.)(\\w+-+)*[\\w\\[:]+([\\S&&[^\\w-]]\\S*)?\\s*$",
flags,
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ class WebURLFinder {
companion object {
// Taken from mozilla.components.support.ktx.util.URLStringUtils. See documentation
// there for a complete description.
private const val autolinkWebUrlPattern = "(\\w+-)*\\w+(://[/]*|:|\\.)(\\w+-)*\\w+([\\S&&[^\\w-]]\\S*)?"
private const val autolinkWebUrlPattern =
"(\\w+-+)*[\\w\\[]+(://[/]*|:|\\.)(\\w+-+)*[\\w\\[:]+([\\S&&[^\\w-]]\\S*)?"

private val autolinkWebUrl by lazy {
Pattern.compile(autolinkWebUrlPattern, 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,56 @@ class URLStringUtilsTest {
assertFalse(isURLLike("www.c-c- "))
assertFalse(isURLLike("3-3 "))

// Valid IPv6 literals correctly recognized as valid.
val validIPv6Literals = listOf(
"[::]",
"[::1]",
"[1::]",
"[1:2:3:4:5:6:7:8]",
"[2001:db8::1.2.3.4]",
"[::1]:8080",
)

validIPv6Literals.forEach { url ->
assertTrue(isURLLike(url))
assertTrue(isURLLike("$url/"))
assertTrue(isURLLike("https://$url"))
assertTrue(isURLLike("https://$url/"))
assertTrue(isURLLike("https:$url"))
assertTrue(isURLLike("https:$url/"))
assertTrue(isURLLike("http://$url"))
assertTrue(isURLLike("http://$url/"))
assertTrue(isURLLike("http:$url"))
pmarks-net marked this conversation as resolved.
Show resolved Hide resolved
assertTrue(isURLLike("http:$url/"))
}

// Invalid IPv6 literals correctly recognized as invalid.
assertFalse(isURLLike("::1"))
assertFalse(isURLLike(":::"))
assertFalse(isURLLike("[[http://]]"))
assertFalse(isURLLike("[[["))
assertFalse(isURLLike("[[[:"))
assertFalse(isURLLike("[[[:/"))
assertFalse(isURLLike("http://]]]"))

// Invalid IPv6 literals correctly recognized as something else.
assertTrue(isURLLike("fe80::"))
assertTrue(isURLLike("x:["))

// Invalid IPv6 literals incorrectly recognized as valid.
// We allow these for now, until bug 1685152 is fixed.
assertTrue(isURLLike("[:::"))
pmarks-net marked this conversation as resolved.
Show resolved Hide resolved
assertTrue(isURLLike("http://[::"))
assertTrue(isURLLike("http://[::/path"))
assertTrue(isURLLike("http://[::?query"))
assertTrue(isURLLike("[[http://banana]]"))
assertTrue(isURLLike("http://[[["))
assertTrue(isURLLike("[[[::"))
assertTrue(isURLLike("[[[::/"))
assertTrue(isURLLike("http://[1.2.3]"))
assertTrue(isURLLike("https://[1:2:3:4:5:6:7]/"))
assertTrue(isURLLike("https://[1:2:3:4:5:6:7:8:9]/"))

// Examples from issues
assertTrue(isURLLike("https://abc--cba.com/")) // #7096
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ class WebURLFinderTest {
)
assertEquals("http://ß.de/", find("http://ß.de/ çnn.çơḿ"))
assertEquals("htt-p://ß.de/", find("çnn.çơḿ htt-p://ß.de/"))
assertEquals(
"http://[2001:db8::1.2.3.4]:8080/inner#anchor&arg=1",
find("test.com http://[2001:db8::1.2.3.4]:8080/inner#anchor&arg=1"),
)
assertEquals("http://[::]", find("test.com http://[::]"))
pmarks-net marked this conversation as resolved.
Show resolved Hide resolved
}

@Test
Expand All @@ -68,6 +73,9 @@ class WebURLFinderTest {
assertEquals("n-oscheme.com", find("n-oscheme.com example.com"))
assertEquals("n-oscheme.com", find("----------n-oscheme.com "))
assertEquals("n-oscheme.ç", find("----------n-oscheme.ç-----------------------"))

// We would ideally test "[::] example.com" here, but java.net.URI
// doesn't seem to accept IPv6 literals without a scheme.
}

@Test
Expand Down Expand Up @@ -118,4 +126,101 @@ class WebURLFinderTest {
assertFalse("content://com.test.app/test".isValidWebURL())
assertFalse("coNTent://com.test.app/test".isValidWebURL())
}

@Test
fun isUrlLikeEmulated() {
// autolinkWebUrlPattern uses a copy of the regex from URLStringUtils,
// so here we emulate isURLLike() and copy its tests.
val isURLLike: (String) -> Boolean = {
find("random_text $it other_random_text") == it.trim()
}

// All cases that behave differently are annotated with INVERT().
val INVERT: (Boolean) -> Boolean = { !it }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I forgot to specify that in my previous comment: we are adding complexity with this annotation-like function, that I think we could replace with clear comments around the tests.

We could replace the use of assertTrue(INVERT(isURLLike(xxx))) by assertFalse(isURLLike(xxx)). Those asserts could be gathered with a comment above explaining that the ideal regex should have the same result as the one in UrlStringUtils.kt, but the current one gives a different result. Also explaining that we allow it for now, until bug 1685152 is fixed.

What do you think? 

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

replace the use of assertTrue(INVERT(isURLLike(xxx))) by assertFalse(isURLLike(xxx)). Those asserts could be gathered with a comment

I intentionally kept assertTrue/assertFalse the same as the original tests, and replaced the comments with a programmatic annotation, in order to reduce the cognitive effort required to maintain a second copy. Your suggestion would increase complexity when both functions are considered.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do understand the idea and the value of trying to keep both tests as similar as possible.
Nevertheless, this is starting a new pattern in the codebase, and maybe we should try to follow the existing conventions instead?

Maybe we could get a second opinion from someone else? cc: @jonalmeida

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey! I agree with @titooan, I too would prefer avoiding new patterns. Adding these cases that should pass/fail can be in a separate section with a well-worded comment to explain that these should be have differently when the linked bug is fixed.

I intentionally kept assertTrue/assertFalse the same as the original tests, and replaced the comments with a programmatic annotation, in order to reduce the cognitive effort required to maintain a second copy. Your suggestion would increase complexity when both functions are considered.

The felt complexity of both solutions are indeed subjective. In cases like this, I would recommend the advice of the code maintainers to decide what is the more preferred way, especially in terms of conventions the codebase follows. 🙂

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.


assertFalse(isURLLike("inurl:mozilla.org advanced search"))
assertFalse(isURLLike("sf: help"))
assertFalse(isURLLike("mozilla./~"))
assertFalse(isURLLike("cnn.com politics"))

assertTrue(isURLLike("about:config"))
assertTrue(isURLLike("about:config:8000"))
assertTrue(INVERT(isURLLike("file:///home/user/myfile.html")))
assertTrue(INVERT(isURLLike("file://////////////home//user/myfile.html")))
assertTrue(INVERT(isURLLike("file://C:\\Users\\user\\myfile.html")))
assertTrue(isURLLike("http://192.168.255.255"))
assertTrue(isURLLike("link.unknown"))
assertTrue(isURLLike("3.14.2019"))
assertTrue(isURLLike("3-four.14.2019"))
assertTrue(isURLLike(" cnn.com "))
assertTrue(isURLLike(" cnn.com"))
assertTrue(isURLLike("cnn.com "))
assertTrue(isURLLike("mozilla.com/~userdir"))
assertTrue(isURLLike("my-domain.com"))
assertTrue(isURLLike("http://faß.de//"))
assertTrue(isURLLike("cnn.cơḿ"))
assertTrue(isURLLike("cnn.çơḿ"))

assertTrue(isURLLike("c-c.com"))
assertTrue(isURLLike("c-c-c-c.c-c-c"))
assertTrue(isURLLike("c-http://c.com"))
assertTrue(isURLLike("about-mozilla:mozilla"))
assertTrue(isURLLike("c-http.d-x"))
assertTrue(isURLLike("www.c.-"))
assertTrue(isURLLike("3-3.3"))
assertTrue(isURLLike("www.c-c.-"))

assertFalse(isURLLike(" -://x.com "))
assertFalse(isURLLike(" -x.com"))
assertFalse(isURLLike("http://www-.com"))
assertFalse(isURLLike("www.c-c- "))
assertFalse(isURLLike("3-3 "))

val validIPv6Literals = listOf(
"[::]",
"[::1]",
"[1::]",
"[1:2:3:4:5:6:7:8]",
"[2001:db8::1.2.3.4]",
"[::1]:8080",
)

validIPv6Literals.forEach { url ->
assertTrue(INVERT(isURLLike(url)))
assertTrue(INVERT(isURLLike("$url/")))
assertTrue(isURLLike("https://$url"))
assertTrue(isURLLike("https://$url/"))
assertTrue(isURLLike("https:$url"))
assertTrue(isURLLike("https:$url/"))
assertTrue(isURLLike("http://$url"))
assertTrue(isURLLike("http://$url/"))
assertTrue(isURLLike("http:$url"))
assertTrue(isURLLike("http:$url/"))
}

assertFalse(isURLLike("::1"))
assertFalse(isURLLike(":::"))
assertFalse(isURLLike("[[http://]]"))
assertFalse(isURLLike("[[["))
assertFalse(isURLLike("[[[:"))
assertFalse(isURLLike("[[[:/"))
assertFalse(isURLLike("http://]]]"))

assertTrue(isURLLike("fe80::"))
assertTrue(isURLLike("x:["))

assertTrue(INVERT(isURLLike("[:::")))
assertTrue(INVERT(isURLLike("http://[::")))
assertTrue(INVERT(isURLLike("http://[::/path")))
assertTrue(INVERT(isURLLike("http://[::?query")))
assertTrue(INVERT(isURLLike("[[http://banana]]")))
assertTrue(INVERT(isURLLike("http://[[[")))
assertTrue(INVERT(isURLLike("[[[::")))
assertTrue(INVERT(isURLLike("[[[::/")))
assertTrue(INVERT(isURLLike("http://[1.2.3]")))
assertTrue(INVERT(isURLLike("https://[1:2:3:4:5:6:7]/")))
assertTrue(INVERT(isURLLike("https://[1:2:3:4:5:6:7:8:9]/")))

assertTrue(isURLLike("https://abc--cba.com/"))
}
}
3 changes: 3 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ permalink: /changelog/
* **browser-engine-gecko**:
* Added support for translating and restoring a translated page on the engine. [Bug 1844523](https://bugzilla.mozilla.org/show_bug.cgi?id=1844523)

* **support-utils**
* Recognize IPv6 literals in the address bar. [Bug 1803465](https://bugzilla.mozilla.org/show_bug.cgi?id=1803465)

# 120.0
* [Commits](https://github.com/mozilla-mobile/firefox-android/compare/releases_v119..releases_v120)
* [Dependencies](https://github.com/mozilla-mobile/firefox-android/blob/releases_v120/android-components/plugins/dependencies/src/main/java/DependenciesPlugin.kt)
Expand Down