Skip to content
This repository was archived by the owner on Nov 1, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ private val re = object {
*/
fun String.isUrl() = URLStringUtils.isURLLike(this)

/**
* Checks if this string is a URL.
*
* This method performs a strict check to determine whether the string is a URL. It takes longer
* to execute than isUrl() but checks whether, e.g., the TLD is ICANN-recognized. Consider
* using isUrl() unless these guarantees are required.
*/
fun String.isUrlStrict() = URLStringUtils.isURLLikeStrict(this)

fun String.toNormalizedUrl() = URLStringUtils.toNormalizedURL(this)

fun String.isPhone() = re.phoneish.matches(this)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,34 @@ class StringTest {
assertNotEquals("Tweet:", url)
}

@Test
fun isUrlStrict() {
assertTrue("mozilla.org".isUrlStrict())
assertTrue(" mozilla.org ".isUrlStrict())
assertTrue("http://mozilla.org".isUrlStrict())
assertTrue("https://mozilla.org".isUrlStrict())
assertTrue("file://somefile.txt".isUrlStrict())
assertTrue("http://mozilla".isUrlStrict())
assertTrue("http://192.168.255.255".isUrlStrict())
assertTrue("about:crashcontent".isUrlStrict())
assertTrue(" about:crashcontent ".isUrlStrict())
assertTrue("sample:about ".isUrlStrict())

assertFalse("sample.notatld".isUrlStrict())
assertTrue("sample.rocks".isUrlStrict())

assertFalse("mozilla".isUrlStrict())
assertFalse("mozilla android".isUrlStrict())
assertFalse(" mozilla android ".isUrlStrict())
assertFalse("Tweet:".isUrlStrict())
assertFalse("inurl:mozilla.org advanced search".isUrlStrict())
assertFalse("what is about:crashes".isUrlStrict())

val extraText = "Check out @asa’s Tweet: https://twitter.com/asa/status/123456789?s=09"
val url = extraText.split(" ").find { it.isUrlStrict() }
assertNotEquals("Tweet:", url)
}

@Test
fun toNormalizedUrl() {
val expectedUrl = "http://mozilla.org"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,39 @@ package mozilla.components.support.utils

import android.net.Uri
import android.text.TextUtils
import androidx.annotation.VisibleForTesting
import java.util.regex.Pattern

object URLStringUtils {
fun isURLLike(string: String, safe: Boolean = false) =
/**
* Determine whether a string is a URL.
*
* This method performs a strict check to determine whether a string is a URL. It takes longer
* to execute than isURLLike() but checks whether, e.g., the TLD is ICANN-recognized. Consider
* using isURLLike() unless these guarantees are required.
*/
fun isURLLikeStrict(string: String, safe: Boolean = false) =
if (safe) {
string.matches(WebURLFinder.fuzzyUrlRegex)
} else {
string.matches(WebURLFinder.fuzzyUrlNonWebRegex)
}

fun isSearchTerm(string: String) = !isURLLike(string, false)
/**
* Determine whether a string is a URL.
*
* This method performs a lenient check to determine whether a string is a URL. Anything that
* contains a :, ://, or . and has no internal spaces is potentially a URL. If you need a
* stricter check, consider using isURLLikeStrict().
*/
fun isURLLike(string: String) = isURLLenient.matcher(string).matches()

/**
* Determine whether a string is a search term.
*
* This method recognizes a string as a search term as anything other than a URL.
*/
fun isSearchTerm(string: String) = !isURLLike(string)

/**
* Normalizes a URL String.
Expand All @@ -28,4 +51,39 @@ object URLStringUtils {
}
return uri.toString()
}

private val isURLLenient by lazy {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll leave this up to the lib owners but I think we should remove isURLLike or at least have it share the isSearchTerm impl because:

  • isURLLike seems redundant to isSearchTerm
  • What is isURLLike supposed to mean? Without further details, how are consumers supposed to use it confidently? Note that it doesn't look like our code uses it (query)

// Be lenient about what is classified as potentially a URL. Anything that contains a :,
// ://, or . and has no internal spaces is potentially a URL.
//
// Use java.util.regex because it is always unicode aware on Android.
// https://developer.android.com/reference/java/util/regex/Pattern.html

// Use both the \w+ and \S* after the punctuation because they seem to match slightly
// different things. The \S matches any non-whitespace character (e.g., '~') and \w
// matches only word characters. In other words, the regex is requiring that there be a
// non-symbol character somewhere after the ., : or :// and before any other character
// or the end of the string. For example, match
// mozilla.com/~userdir
// and not
// mozilla./~ or mozilla:/
// Without the [/]* after the :// in the alternation of the characters required to be a
// valid URL,
// file:///home/user/myfile.html
// is considered a search term; it is clearly a URL.
Pattern.compile("^\\s*\\w+(://[/]*|:|\\.)\\w+\\S*\\s*$", flags)
}

@VisibleForTesting(otherwise = VisibleForTesting.PRIVATE)
internal const val UNICODE_CHARACTER_CLASS: Int = 0x100

// To run tests on a non-Android device (like a computer), Pattern.compile
// requires a flag to enable unicode support. Set a value like flags here with a local
// copy of UNICODE_CHARACTER_CLASS. Use a local copy because that constant is not
// available on Android platforms < 24 (Fenix targets 21). At runtime this is not an issue
// because, again, Android REs are always unicode compliant.
// NB: The value has to go through an intermediate variable; otherwise, the linter will
// complain that this value is not one of the predefined enums that are allowed.
@VisibleForTesting(otherwise = VisibleForTesting.PRIVATE)
internal var flags = 0
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,24 @@ package mozilla.components.support.utils
import androidx.test.ext.junit.runners.AndroidJUnit4
import mozilla.components.support.utils.URLStringUtils.isSearchTerm
import mozilla.components.support.utils.URLStringUtils.isURLLike
import mozilla.components.support.utils.URLStringUtils.isURLLikeStrict
import mozilla.components.support.utils.URLStringUtils.toNormalizedURL
import org.junit.Assert.assertEquals
import org.junit.Assert.assertFalse
import org.junit.Assert.assertNotEquals
import org.junit.Assert.assertTrue
import org.junit.Before
import org.junit.Test
import org.junit.runner.RunWith

@RunWith(AndroidJUnit4::class)
class URLStringUtilsTest {

@Before
fun configurePatternFlags() {
URLStringUtils.flags = URLStringUtils.UNICODE_CHARACTER_CLASS
}

@Test
fun toNormalizedURL() {
val expectedUrl = "http://mozilla.org"
Expand All @@ -27,49 +34,94 @@ class URLStringUtilsTest {
}

@Test
fun isURLLike() {
assertTrue(isURLLike("mozilla.org"))
assertTrue(isURLLike(" mozilla.org "))
assertTrue(isURLLike("http://mozilla.org"))
assertTrue(isURLLike("https://mozilla.org"))
assertTrue(isURLLike("file://somefile.txt"))
assertFalse(isURLLike("file://somefile.txt", true))

assertTrue(isURLLike("http://mozilla"))
assertTrue(isURLLike("http://192.168.255.255"))
assertTrue(isURLLike("192.167.255.255"))
assertTrue(isURLLike("about:crashcontent"))
assertTrue(isURLLike(" about:crashcontent "))
assertTrue(isURLLike("sample:about "))
assertTrue(isURLLike("https://mozilla-mobile.com"))

assertTrue(isURLLike("link.info"))
assertFalse(isURLLike("link.unknown"))

assertFalse(isURLLike("mozilla"))
assertFalse(isURLLike("mozilla android"))
assertFalse(isURLLike(" mozilla android "))
assertFalse(isURLLike("Tweet:"))
assertFalse(isURLLike("inurl:mozilla.org advanced search"))
assertFalse(isURLLike("what is about:crashes"))
fun isURLLikeStrict() {
assertTrue(isURLLikeStrict("mozilla.org"))
assertTrue(isURLLikeStrict(" mozilla.org "))
assertTrue(isURLLikeStrict("http://mozilla.org"))
assertTrue(isURLLikeStrict("https://mozilla.org"))
assertTrue(isURLLikeStrict("file://somefile.txt"))
assertFalse(isURLLikeStrict("file://somefile.txt", true))

assertTrue(isURLLikeStrict("http://mozilla"))
assertTrue(isURLLikeStrict("http://192.168.255.255"))
assertTrue(isURLLikeStrict("192.167.255.255"))
assertTrue(isURLLikeStrict("about:crashcontent"))
assertTrue(isURLLikeStrict(" about:crashcontent "))
assertTrue(isURLLikeStrict("sample:about "))
assertTrue(isURLLikeStrict("https://mozilla-mobile.com"))

assertTrue(isURLLikeStrict("link.info"))
assertFalse(isURLLikeStrict("link.unknown"))

assertFalse(isURLLikeStrict("mozilla"))
assertFalse(isURLLikeStrict("mozilla android"))
assertFalse(isURLLikeStrict(" mozilla android "))
assertFalse(isURLLikeStrict("Tweet:"))
assertFalse(isURLLikeStrict("inurl:mozilla.org advanced search"))
assertFalse(isURLLikeStrict("what is about:crashes"))

val extraText = "Check out @asa’s Tweet: https://twitter.com/asa/status/123456789?s=09"
val url = extraText.split(" ").find { isURLLike(it) }
val url = extraText.split(" ").find { isURLLikeStrict(it) }
assertNotEquals("Tweet:", url)

assertFalse(isURLLike("3.14"))
assertFalse(isURLLike("3.14.2019"))
assertFalse(isURLLikeStrict("3.14"))
assertFalse(isURLLikeStrict("3.14.2019"))

assertFalse(isURLLikeStrict("file://somefile.txt", true))
assertFalse(isURLLikeStrict("about:config", true))
}

@Test
fun isUrlLike() {
assertFalse(isURLLike("inurl:mozilla.org advanced search"))
assertFalse(isURLLike("sf: help"))
assertFalse(isURLLike("mozilla./~"))
assertFalse(isURLLike("cnn.com politics"))

assertTrue(isURLLike("about:config"))
assertTrue(isURLLike("about:config:8000"))
assertTrue(isURLLike("file:///home/user/myfile.html"))
assertTrue(isURLLike("file://////////////home//user/myfile.html"))
assertTrue(isURLLike("file://C:\\Users\\user\\myfile.html"))
assertTrue(isURLLike("http://192.168.255.255"))

assertTrue(isURLLike("link.unknown"))

// Per https://bugs.chromium.org/p/chromium/issues/detail?id=31405, ICANN will accept
// purely numeric gTLDs.
assertTrue(isURLLike("3.14.2019"))
assertTrue(isURLLike(" cnn.com "))
assertTrue(isURLLike(" cnn.com"))
assertTrue(isURLLike("cnn.com "))
assertTrue(isURLLike("mozilla.com/~userdir"))

assertFalse(isURLLike("file://somefile.txt", true))
assertFalse(isURLLike("about:config", true))
assertTrue(isURLLike("http://faß.de//"))
assertTrue(isURLLike("cnn.cơḿ"))
assertTrue(isURLLike("cnn.çơḿ"))
}

@Test
fun isSearchTerm() {
assertTrue(isSearchTerm("inurl:mozilla.org advanced search"))
assertTrue(isSearchTerm("3.14.2019"))
assertTrue(isSearchTerm("sf: help"))
assertTrue(isSearchTerm("mozilla./~"))
assertTrue(isSearchTerm("cnn.com politics"))

assertFalse(isSearchTerm("about:config"))
assertFalse(isSearchTerm("about:config:8000"))
assertFalse(isSearchTerm("file:///home/user/myfile.html"))
assertFalse(isSearchTerm("file://////////////home//user/myfile.html"))
assertFalse(isSearchTerm("file://C:\\Users\\user\\myfile.html"))
assertFalse(isSearchTerm("http://192.168.255.255"))
// Per https://bugs.chromium.org/p/chromium/issues/detail?id=31405, ICANN will accept
// purely numeric gTLDs.
assertFalse(isSearchTerm("3.14.2019"))
assertFalse(isSearchTerm(" cnn.com "))
assertFalse(isSearchTerm(" cnn.com"))
assertFalse(isSearchTerm("cnn.com "))
assertFalse(isSearchTerm("mozilla.com/~userdir"))
assertFalse(isSearchTerm("http://faß.de//"))
assertFalse(isSearchTerm("cnn.cơḿ"))
assertFalse(isSearchTerm("cnn.çơḿ"))
}
}
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ permalink: /changelog/

* **support-ktx**
* Added `Context.getDrawableWithTint` extension method to get a drawable resource with a tint applied.
* `String.isUrl` is now using a more lenient check for improved performance. Strictly checking whether a string is a URL or not is supported through the new `String.isUrlStrict` method.

* **support-base**
* ⚠️ **This is a breaking change**:
Expand Down