diff --git a/components/support/ktx/src/main/java/mozilla/components/support/ktx/kotlin/String.kt b/components/support/ktx/src/main/java/mozilla/components/support/ktx/kotlin/String.kt index f6839310b07..b558d5f7070 100644 --- a/components/support/ktx/src/main/java/mozilla/components/support/ktx/kotlin/String.kt +++ b/components/support/ktx/src/main/java/mozilla/components/support/ktx/kotlin/String.kt @@ -28,6 +28,15 @@ private val re = object { */ fun String.isUrl() = URLStringUtils.isURLLike(this) +/** + * Checks if this string is a URL. + * + * This method performs a strict check to determine whether the string is a URL. It takes longer + * to execute than isUrl() but checks whether, e.g., the TLD is ICANN-recognized. Consider + * using isUrl() unless these guarantees are required. + */ +fun String.isUrlStrict() = URLStringUtils.isURLLikeStrict(this) + fun String.toNormalizedUrl() = URLStringUtils.toNormalizedURL(this) fun String.isPhone() = re.phoneish.matches(this) diff --git a/components/support/ktx/src/test/java/mozilla/components/support/ktx/kotlin/StringTest.kt b/components/support/ktx/src/test/java/mozilla/components/support/ktx/kotlin/StringTest.kt index 1b385c9e53a..1bb24e80cf0 100644 --- a/components/support/ktx/src/test/java/mozilla/components/support/ktx/kotlin/StringTest.kt +++ b/components/support/ktx/src/test/java/mozilla/components/support/ktx/kotlin/StringTest.kt @@ -43,6 +43,34 @@ class StringTest { assertNotEquals("Tweet:", url) } + @Test + fun isUrlStrict() { + assertTrue("mozilla.org".isUrlStrict()) + assertTrue(" mozilla.org ".isUrlStrict()) + assertTrue("http://mozilla.org".isUrlStrict()) + assertTrue("https://mozilla.org".isUrlStrict()) + assertTrue("file://somefile.txt".isUrlStrict()) + assertTrue("http://mozilla".isUrlStrict()) + assertTrue("http://192.168.255.255".isUrlStrict()) + assertTrue("about:crashcontent".isUrlStrict()) + assertTrue(" about:crashcontent ".isUrlStrict()) + assertTrue("sample:about ".isUrlStrict()) + + assertFalse("sample.notatld".isUrlStrict()) + assertTrue("sample.rocks".isUrlStrict()) + + assertFalse("mozilla".isUrlStrict()) + assertFalse("mozilla android".isUrlStrict()) + assertFalse(" mozilla android ".isUrlStrict()) + assertFalse("Tweet:".isUrlStrict()) + assertFalse("inurl:mozilla.org advanced search".isUrlStrict()) + assertFalse("what is about:crashes".isUrlStrict()) + + val extraText = "Check out @asa’s Tweet: https://twitter.com/asa/status/123456789?s=09" + val url = extraText.split(" ").find { it.isUrlStrict() } + assertNotEquals("Tweet:", url) + } + @Test fun toNormalizedUrl() { val expectedUrl = "http://mozilla.org" diff --git a/components/support/utils/src/main/java/mozilla/components/support/utils/URLStringUtils.kt b/components/support/utils/src/main/java/mozilla/components/support/utils/URLStringUtils.kt index 87ef1161290..b73a184e012 100644 --- a/components/support/utils/src/main/java/mozilla/components/support/utils/URLStringUtils.kt +++ b/components/support/utils/src/main/java/mozilla/components/support/utils/URLStringUtils.kt @@ -6,16 +6,39 @@ package mozilla.components.support.utils import android.net.Uri import android.text.TextUtils +import androidx.annotation.VisibleForTesting +import java.util.regex.Pattern object URLStringUtils { - fun isURLLike(string: String, safe: Boolean = false) = + /** + * Determine whether a string is a URL. + * + * This method performs a strict check to determine whether a string is a URL. It takes longer + * to execute than isURLLike() but checks whether, e.g., the TLD is ICANN-recognized. Consider + * using isURLLike() unless these guarantees are required. + */ + fun isURLLikeStrict(string: String, safe: Boolean = false) = if (safe) { string.matches(WebURLFinder.fuzzyUrlRegex) } else { string.matches(WebURLFinder.fuzzyUrlNonWebRegex) } - fun isSearchTerm(string: String) = !isURLLike(string, false) + /** + * Determine whether a string is a URL. + * + * This method performs a lenient check to determine whether a string is a URL. Anything that + * contains a :, ://, or . and has no internal spaces is potentially a URL. If you need a + * stricter check, consider using isURLLikeStrict(). + */ + fun isURLLike(string: String) = isURLLenient.matcher(string).matches() + + /** + * Determine whether a string is a search term. + * + * This method recognizes a string as a search term as anything other than a URL. + */ + fun isSearchTerm(string: String) = !isURLLike(string) /** * Normalizes a URL String. @@ -28,4 +51,39 @@ object URLStringUtils { } return uri.toString() } + + private val isURLLenient by lazy { + // Be lenient about what is classified as potentially a URL. Anything that contains a :, + // ://, or . and has no internal spaces is potentially a URL. + // + // Use java.util.regex because it is always unicode aware on Android. + // https://developer.android.com/reference/java/util/regex/Pattern.html + + // Use both the \w+ and \S* after the punctuation because they seem to match slightly + // different things. The \S matches any non-whitespace character (e.g., '~') and \w + // matches only word characters. In other words, the regex is requiring that there be a + // non-symbol character somewhere after the ., : or :// and before any other character + // or the end of the string. For example, match + // mozilla.com/~userdir + // and not + // mozilla./~ or mozilla:/ + // Without the [/]* after the :// in the alternation of the characters required to be a + // valid URL, + // file:///home/user/myfile.html + // is considered a search term; it is clearly a URL. + Pattern.compile("^\\s*\\w+(://[/]*|:|\\.)\\w+\\S*\\s*$", flags) + } + + @VisibleForTesting(otherwise = VisibleForTesting.PRIVATE) + internal const val UNICODE_CHARACTER_CLASS: Int = 0x100 + + // To run tests on a non-Android device (like a computer), Pattern.compile + // requires a flag to enable unicode support. Set a value like flags here with a local + // copy of UNICODE_CHARACTER_CLASS. Use a local copy because that constant is not + // available on Android platforms < 24 (Fenix targets 21). At runtime this is not an issue + // because, again, Android REs are always unicode compliant. + // NB: The value has to go through an intermediate variable; otherwise, the linter will + // complain that this value is not one of the predefined enums that are allowed. + @VisibleForTesting(otherwise = VisibleForTesting.PRIVATE) + internal var flags = 0 } diff --git a/components/support/utils/src/test/java/mozilla/components/support/utils/URLStringUtilsTest.kt b/components/support/utils/src/test/java/mozilla/components/support/utils/URLStringUtilsTest.kt index b412d491e09..e4b342897bf 100644 --- a/components/support/utils/src/test/java/mozilla/components/support/utils/URLStringUtilsTest.kt +++ b/components/support/utils/src/test/java/mozilla/components/support/utils/URLStringUtilsTest.kt @@ -7,17 +7,24 @@ package mozilla.components.support.utils import androidx.test.ext.junit.runners.AndroidJUnit4 import mozilla.components.support.utils.URLStringUtils.isSearchTerm import mozilla.components.support.utils.URLStringUtils.isURLLike +import mozilla.components.support.utils.URLStringUtils.isURLLikeStrict import mozilla.components.support.utils.URLStringUtils.toNormalizedURL import org.junit.Assert.assertEquals import org.junit.Assert.assertFalse import org.junit.Assert.assertNotEquals import org.junit.Assert.assertTrue +import org.junit.Before import org.junit.Test import org.junit.runner.RunWith @RunWith(AndroidJUnit4::class) class URLStringUtilsTest { + @Before + fun configurePatternFlags() { + URLStringUtils.flags = URLStringUtils.UNICODE_CHARACTER_CLASS + } + @Test fun toNormalizedURL() { val expectedUrl = "http://mozilla.org" @@ -27,49 +34,94 @@ class URLStringUtilsTest { } @Test - fun isURLLike() { - assertTrue(isURLLike("mozilla.org")) - assertTrue(isURLLike(" mozilla.org ")) - assertTrue(isURLLike("http://mozilla.org")) - assertTrue(isURLLike("https://mozilla.org")) - assertTrue(isURLLike("file://somefile.txt")) - assertFalse(isURLLike("file://somefile.txt", true)) - - assertTrue(isURLLike("http://mozilla")) - assertTrue(isURLLike("http://192.168.255.255")) - assertTrue(isURLLike("192.167.255.255")) - assertTrue(isURLLike("about:crashcontent")) - assertTrue(isURLLike(" about:crashcontent ")) - assertTrue(isURLLike("sample:about ")) - assertTrue(isURLLike("https://mozilla-mobile.com")) - - assertTrue(isURLLike("link.info")) - assertFalse(isURLLike("link.unknown")) - - assertFalse(isURLLike("mozilla")) - assertFalse(isURLLike("mozilla android")) - assertFalse(isURLLike(" mozilla android ")) - assertFalse(isURLLike("Tweet:")) - assertFalse(isURLLike("inurl:mozilla.org advanced search")) - assertFalse(isURLLike("what is about:crashes")) + fun isURLLikeStrict() { + assertTrue(isURLLikeStrict("mozilla.org")) + assertTrue(isURLLikeStrict(" mozilla.org ")) + assertTrue(isURLLikeStrict("http://mozilla.org")) + assertTrue(isURLLikeStrict("https://mozilla.org")) + assertTrue(isURLLikeStrict("file://somefile.txt")) + assertFalse(isURLLikeStrict("file://somefile.txt", true)) + + assertTrue(isURLLikeStrict("http://mozilla")) + assertTrue(isURLLikeStrict("http://192.168.255.255")) + assertTrue(isURLLikeStrict("192.167.255.255")) + assertTrue(isURLLikeStrict("about:crashcontent")) + assertTrue(isURLLikeStrict(" about:crashcontent ")) + assertTrue(isURLLikeStrict("sample:about ")) + assertTrue(isURLLikeStrict("https://mozilla-mobile.com")) + + assertTrue(isURLLikeStrict("link.info")) + assertFalse(isURLLikeStrict("link.unknown")) + + assertFalse(isURLLikeStrict("mozilla")) + assertFalse(isURLLikeStrict("mozilla android")) + assertFalse(isURLLikeStrict(" mozilla android ")) + assertFalse(isURLLikeStrict("Tweet:")) + assertFalse(isURLLikeStrict("inurl:mozilla.org advanced search")) + assertFalse(isURLLikeStrict("what is about:crashes")) val extraText = "Check out @asa’s Tweet: https://twitter.com/asa/status/123456789?s=09" - val url = extraText.split(" ").find { isURLLike(it) } + val url = extraText.split(" ").find { isURLLikeStrict(it) } assertNotEquals("Tweet:", url) - assertFalse(isURLLike("3.14")) - assertFalse(isURLLike("3.14.2019")) + assertFalse(isURLLikeStrict("3.14")) + assertFalse(isURLLikeStrict("3.14.2019")) + + assertFalse(isURLLikeStrict("file://somefile.txt", true)) + assertFalse(isURLLikeStrict("about:config", true)) + } + + @Test + fun isUrlLike() { + assertFalse(isURLLike("inurl:mozilla.org advanced search")) + assertFalse(isURLLike("sf: help")) + assertFalse(isURLLike("mozilla./~")) + assertFalse(isURLLike("cnn.com politics")) + + assertTrue(isURLLike("about:config")) + assertTrue(isURLLike("about:config:8000")) + assertTrue(isURLLike("file:///home/user/myfile.html")) + assertTrue(isURLLike("file://////////////home//user/myfile.html")) + assertTrue(isURLLike("file://C:\\Users\\user\\myfile.html")) + assertTrue(isURLLike("http://192.168.255.255")) + + assertTrue(isURLLike("link.unknown")) + + // Per https://bugs.chromium.org/p/chromium/issues/detail?id=31405, ICANN will accept + // purely numeric gTLDs. + assertTrue(isURLLike("3.14.2019")) + assertTrue(isURLLike(" cnn.com ")) + assertTrue(isURLLike(" cnn.com")) + assertTrue(isURLLike("cnn.com ")) + assertTrue(isURLLike("mozilla.com/~userdir")) - assertFalse(isURLLike("file://somefile.txt", true)) - assertFalse(isURLLike("about:config", true)) + assertTrue(isURLLike("http://faß.de//")) + assertTrue(isURLLike("cnn.cơḿ")) + assertTrue(isURLLike("cnn.çơḿ")) } @Test fun isSearchTerm() { assertTrue(isSearchTerm("inurl:mozilla.org advanced search")) - assertTrue(isSearchTerm("3.14.2019")) + assertTrue(isSearchTerm("sf: help")) + assertTrue(isSearchTerm("mozilla./~")) + assertTrue(isSearchTerm("cnn.com politics")) assertFalse(isSearchTerm("about:config")) + assertFalse(isSearchTerm("about:config:8000")) + assertFalse(isSearchTerm("file:///home/user/myfile.html")) + assertFalse(isSearchTerm("file://////////////home//user/myfile.html")) + assertFalse(isSearchTerm("file://C:\\Users\\user\\myfile.html")) assertFalse(isSearchTerm("http://192.168.255.255")) + // Per https://bugs.chromium.org/p/chromium/issues/detail?id=31405, ICANN will accept + // purely numeric gTLDs. + assertFalse(isSearchTerm("3.14.2019")) + assertFalse(isSearchTerm(" cnn.com ")) + assertFalse(isSearchTerm(" cnn.com")) + assertFalse(isSearchTerm("cnn.com ")) + assertFalse(isSearchTerm("mozilla.com/~userdir")) + assertFalse(isSearchTerm("http://faß.de//")) + assertFalse(isSearchTerm("cnn.cơḿ")) + assertFalse(isSearchTerm("cnn.çơḿ")) } } diff --git a/docs/changelog.md b/docs/changelog.md index 2ad64a57bc8..102334c3061 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -45,6 +45,7 @@ permalink: /changelog/ * **support-ktx** * Added `Context.getDrawableWithTint` extension method to get a drawable resource with a tint applied. + * `String.isUrl` is now using a more lenient check for improved performance. Strictly checking whether a string is a URL or not is supported through the new `String.isUrlStrict` method. * **support-base** * ⚠️ **This is a breaking change**: