Skip to content
This repository has been archived by the owner on Jun 17, 2024. It is now read-only.

Bug 1803465 - extend isURLLenient to match IPv6 literals #4090

Merged
merged 18 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,20 @@ object URLStringUtils {

private val isURLLenient by lazy {
// Be lenient about what is classified as potentially a URL.
// (\w+-+)*\w+(://[/]*|:|\.)(\w+-+)*\w+([\S&&[^\w-]]\S*)?
// ------- -------
// (\w+-+)*[\w\[]+(://[/]*|:|\.)(\w+-+)*[\w\[:]+([\S&&[^\w-]]\S*)?
// -------- --------
// 0 or more pairs of consecutive word letters or dashes
// --- ---
// followed by at least a single word letter.
// ----------- ----------
// ------- --------
// followed by at least a single word letter or [ipv6::] character.
// --------------- ----------------
// Combined, that means "w", "w-w", "w-w-w", etc match, but "w-", "w-w-", "w-w-w-" do not.
// --------------
// --------------
// That surrounds :, :// or .
// -
// -
// At the end, there may be an optional
// ------------
// ------------
// non-word, non-- but still non-space character (e.g., ':', '/', '.', '?' but not 'a', '-', '\t')
// ---
// ---
// and 0 or more non-space characters.
//
// These are some (odd) examples of valid urls according to this pattern:
Expand All @@ -77,7 +77,7 @@ object URLStringUtils {
// www.c-c-
// 3-3
Pattern.compile(
"^\\s*(\\w+-+)*\\w+(://[/]*|:|\\.)(\\w+-+)*\\w+([\\S&&[^\\w-]]\\S*)?\\s*$",
"^\\s*(\\w+-+)*[\\w\\[]+(://[/]*|:|\\.)(\\w+-+)*[\\w\\[:]+([\\S&&[^\\w-]]\\S*)?\\s*$",
flags,
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ class WebURLFinder {
companion object {
// Taken from mozilla.components.support.ktx.util.URLStringUtils. See documentation
// there for a complete description.
private const val autolinkWebUrlPattern = "(\\w+-)*\\w+(://[/]*|:|\\.)(\\w+-)*\\w+([\\S&&[^\\w-]]\\S*)?"
private const val autolinkWebUrlPattern =
"(\\w+-+)*[\\w\\[]+(://[/]*|:|\\.)(\\w+-+)*[\\w\\[:]+([\\S&&[^\\w-]]\\S*)?"

private val autolinkWebUrl by lazy {
Pattern.compile(autolinkWebUrlPattern, 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,56 @@ class URLStringUtilsTest {
assertFalse(isURLLike("www.c-c- "))
assertFalse(isURLLike("3-3 "))

// Valid IPv6 literals correctly recognized as valid.
val validIPv6Literals = listOf(
"[::]",
"[::1]",
"[1::]",
"[1:2:3:4:5:6:7:8]",
"[2001:db8::1.2.3.4]",
"[::1]:8080",
)

validIPv6Literals.forEach { url ->
assertTrue(isURLLike(url))
assertTrue(isURLLike("$url/"))
assertTrue(isURLLike("https://$url"))
assertTrue(isURLLike("https://$url/"))
assertTrue(isURLLike("https:$url"))
assertTrue(isURLLike("https:$url/"))
assertTrue(isURLLike("http://$url"))
assertTrue(isURLLike("http://$url/"))
assertTrue(isURLLike("http:$url"))
pmarks-net marked this conversation as resolved.
Show resolved Hide resolved
assertTrue(isURLLike("http:$url/"))
}

// Invalid IPv6 literals correctly recognized as invalid.
assertFalse(isURLLike("::1"))
assertFalse(isURLLike(":::"))
assertFalse(isURLLike("[[http://]]"))
assertFalse(isURLLike("[[["))
assertFalse(isURLLike("[[[:"))
assertFalse(isURLLike("[[[:/"))
assertFalse(isURLLike("http://]]]"))

// Invalid IPv6 literals correctly recognized as something else.
assertTrue(isURLLike("fe80::"))
assertTrue(isURLLike("x:["))

// Invalid IPv6 literals incorrectly recognized as valid.
// We allow these for now, until bug 1685152 is fixed.
assertTrue(isURLLike("[:::"))
pmarks-net marked this conversation as resolved.
Show resolved Hide resolved
assertTrue(isURLLike("http://[::"))
assertTrue(isURLLike("http://[::/path"))
assertTrue(isURLLike("http://[::?query"))
assertTrue(isURLLike("[[http://banana]]"))
assertTrue(isURLLike("http://[[["))
assertTrue(isURLLike("[[[::"))
assertTrue(isURLLike("[[[::/"))
assertTrue(isURLLike("http://[1.2.3]"))
assertTrue(isURLLike("https://[1:2:3:4:5:6:7]/"))
assertTrue(isURLLike("https://[1:2:3:4:5:6:7:8:9]/"))

// Examples from issues
assertTrue(isURLLike("https://abc--cba.com/")) // #7096
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ class WebURLFinderTest {
)
assertEquals("http://ß.de/", find("http://ß.de/ çnn.çơḿ"))
assertEquals("htt-p://ß.de/", find("çnn.çơḿ htt-p://ß.de/"))
assertEquals(
"http://[2001:db8::1.2.3.4]:8080/inner#anchor&arg=1",
find("test.com http://[2001:db8::1.2.3.4]:8080/inner#anchor&arg=1"),
)
assertEquals("http://[::]", find("test.com http://[::]"))
pmarks-net marked this conversation as resolved.
Show resolved Hide resolved
}

@Test
Expand All @@ -68,6 +73,9 @@ class WebURLFinderTest {
assertEquals("n-oscheme.com", find("n-oscheme.com example.com"))
assertEquals("n-oscheme.com", find("----------n-oscheme.com "))
assertEquals("n-oscheme.ç", find("----------n-oscheme.ç-----------------------"))

// We would ideally test "[::] example.com" here, but java.net.URI
// doesn't seem to accept IPv6 literals without a scheme.
}

@Test
Expand Down Expand Up @@ -118,4 +126,108 @@ class WebURLFinderTest {
assertFalse("content://com.test.app/test".isValidWebURL())
assertFalse("coNTent://com.test.app/test".isValidWebURL())
}

@Test
fun isUrlLikeEmulated() {
// autolinkWebUrlPattern uses a copy of the regex from URLStringUtils,
// so here we emulate isURLLike() and copy its tests.
val isURLLike: (String) -> Boolean = {
find("random_text $it other_random_text") == it.trim()
}

assertFalse(isURLLike("inurl:mozilla.org advanced search"))
assertFalse(isURLLike("sf: help"))
assertFalse(isURLLike("mozilla./~"))
assertFalse(isURLLike("cnn.com politics"))

assertTrue(isURLLike("about:config"))
assertTrue(isURLLike("about:config:8000"))

// These cases differ from the original isUrlLike test because
// file:// is rejected by isInvalidUriScheme.
assertFalse(isURLLike("file:///home/user/myfile.html"))
assertFalse(isURLLike("file://////////////home//user/myfile.html"))
assertFalse(isURLLike("file://C:\\Users\\user\\myfile.html"))

assertTrue(isURLLike("http://192.168.255.255"))
assertTrue(isURLLike("link.unknown"))
assertTrue(isURLLike("3.14.2019"))
assertTrue(isURLLike("3-four.14.2019"))
assertTrue(isURLLike(" cnn.com "))
assertTrue(isURLLike(" cnn.com"))
assertTrue(isURLLike("cnn.com "))
assertTrue(isURLLike("mozilla.com/~userdir"))
assertTrue(isURLLike("my-domain.com"))
assertTrue(isURLLike("http://faß.de//"))
assertTrue(isURLLike("cnn.cơḿ"))
assertTrue(isURLLike("cnn.çơḿ"))

assertTrue(isURLLike("c-c.com"))
assertTrue(isURLLike("c-c-c-c.c-c-c"))
assertTrue(isURLLike("c-http://c.com"))
assertTrue(isURLLike("about-mozilla:mozilla"))
assertTrue(isURLLike("c-http.d-x"))
assertTrue(isURLLike("www.c.-"))
assertTrue(isURLLike("3-3.3"))
assertTrue(isURLLike("www.c-c.-"))

assertFalse(isURLLike(" -://x.com "))
assertFalse(isURLLike(" -x.com"))
assertFalse(isURLLike("http://www-.com"))
assertFalse(isURLLike("www.c-c- "))
assertFalse(isURLLike("3-3 "))

val validIPv6Literals = listOf(
"[::]",
"[::1]",
"[1::]",
"[1:2:3:4:5:6:7:8]",
"[2001:db8::1.2.3.4]",
"[::1]:8080",
)

validIPv6Literals.forEach { url ->
// These cases differ from the original isUrlLike test because
// java.net.URI doesn't recognize bare IPv6 literals.
assertFalse(isURLLike(url))
assertFalse(isURLLike("$url/"))

assertTrue(isURLLike("https://$url"))
assertTrue(isURLLike("https://$url/"))
assertTrue(isURLLike("https:$url"))
assertTrue(isURLLike("https:$url/"))
assertTrue(isURLLike("http://$url"))
assertTrue(isURLLike("http://$url/"))
assertTrue(isURLLike("http:$url"))
assertTrue(isURLLike("http:$url/"))
}

assertFalse(isURLLike("::1"))
assertFalse(isURLLike(":::"))
assertFalse(isURLLike("[[http://]]"))
assertFalse(isURLLike("[[["))
assertFalse(isURLLike("[[[:"))
assertFalse(isURLLike("[[[:/"))
assertFalse(isURLLike("http://]]]"))

assertTrue(isURLLike("fe80::"))
assertTrue(isURLLike("x:["))

// These cases differ from the original isUrlLike test because
// the regex is just an approximation. When bug 1685152 is fixed,
// the original isURLLike will also return false.
assertFalse(isURLLike("[:::"))
assertFalse(isURLLike("http://[::"))
assertFalse(isURLLike("http://[::/path"))
assertFalse(isURLLike("http://[::?query"))
assertFalse(isURLLike("[[http://banana]]"))
assertFalse(isURLLike("http://[[["))
assertFalse(isURLLike("[[[::"))
assertFalse(isURLLike("[[[::/"))
assertFalse(isURLLike("http://[1.2.3]"))
assertFalse(isURLLike("https://[1:2:3:4:5:6:7]/"))
assertFalse(isURLLike("https://[1:2:3:4:5:6:7:8:9]/"))

assertTrue(isURLLike("https://abc--cba.com/"))
}
}
5 changes: 4 additions & 1 deletion docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ permalink: /changelog/
* [Gecko](https://github.com/mozilla-mobile/firefox-android/blob/main/android-components/plugins/dependencies/src/main/java/Gecko.kt)
* [Configuration](https://github.com/mozilla-mobile/firefox-android/blob/main/android-components/.config.yml)

* **support-utils**
* Recognize IPv6 literals in the address bar. [Bug 1803465](https://bugzilla.mozilla.org/show_bug.cgi?id=1803465)

# 121.0
* [Commits](https://github.com/mozilla-mobile/firefox-android/compare/releases_v120..releases_v121)
* [Dependencies](https://github.com/mozilla-mobile/firefox-android/blob/releases_v121/android-components/plugins/dependencies/src/main/java/DependenciesPlugin.kt)
Expand Down Expand Up @@ -53,7 +56,7 @@ permalink: /changelog/

* **feature-tabs**
* Removed deprecated `TabsUseCases.AddNewPrivateTabUseCase`. [Bug 1853070](https://bugzilla.mozilla.org/show_bug.cgi?id=1853070)

* **lib-crash-sentry**
* `SentryService.initIfNeeded` is now public. [bug #1851676](https://bugzilla.mozilla.org/show_bug.cgi?id=1851676)

Expand Down