Skip to content
Permalink
Browse files

Closes #1647: Improve URL toolbar autocompletion matching

It still isn't great, but has a broader coverage than before.
  • Loading branch information...
grigoryk committed Jan 4, 2019
1 parent f0e4567 commit ff25ec3e6646736e2b4ba3ee1d9fdd9a8412ce8c
@@ -202,8 +202,8 @@ class InMemoryHistoryStorageTest {

history.recordVisit("https://en.wikipedia.org/wiki/Mozilla", VisitType.LINK)
res = history.getAutocompleteSuggestion("en")!!
assertEquals("en.wikipedia.org/wiki/Mozilla", res.text)
assertEquals("https://en.wikipedia.org/wiki/Mozilla", res.url)
assertEquals("en.wikipedia.org/wiki/mozilla", res.text)
assertEquals("https://en.wikipedia.org/wiki/mozilla", res.url)
assertEquals("memoryHistory", res.source)
assertEquals(3, res.totalItems)

@@ -8,50 +8,86 @@ package mozilla.components.support.utils
import java.net.MalformedURLException
import java.net.URL

const val WWW_PREFIX_OFFSET = 4

data class DomainMatch(val url: String, val matchedSegment: String)

// FIXME implement Fennec-style segment matching logic
// See https://github.com/mozilla-mobile/android-components/issues/1279
fun segmentAwareDomainMatch(query: String, urls: Iterable<String>): DomainMatch? {
return basicMatch(query, urls)?.let { matchedUrl ->
matchSegment(query, matchedUrl)?.let { DomainMatch(matchedUrl, it) }
val caseInsensitiveQuery = query.toLowerCase()
// Process input 'urls' lazily, as the list could be very large and likely we'll find a match
// by going through just a small subset.
val caseInsensitiveUrls = urls.asSequence().map { it.toLowerCase() }

return basicMatch(caseInsensitiveQuery, caseInsensitiveUrls)?.let { matchedUrl ->
matchSegment(caseInsensitiveQuery, matchedUrl)?.let { DomainMatch(matchedUrl, it) }
}
}

@SuppressWarnings("ReturnCount")
private fun basicMatch(query: String, urls: Iterable<String>): String? {
private fun basicMatch(query: String, urls: Sequence<String>): String? {
for (rawUrl in urls) {
if (rawUrl.startsWith(query)) {
return rawUrl
}

val host = try {
val url = try {
URL(rawUrl)
} catch (e: MalformedURLException) {
null
}?.host ?: ""
}

var urlSansProtocol = url?.host
urlSansProtocol += url?.port?.orEmpty() + url?.path
urlSansProtocol?.let {
if (it.startsWith(query)) {
return rawUrl
}
}

val host = url?.host ?: ""

if (host.startsWith(query)) {
return rawUrl
}
val strippedHost = if (host.startsWith("www.")) {
host.substring(WWW_PREFIX_OFFSET)
} else {
host
}
if (strippedHost.startsWith(query)) {

if (host.noCommonSubdomains().startsWith(query)) {
return rawUrl
}
}
return null
}

private fun matchSegment(query: String, rawUrl: String): String? {
if (rawUrl.startsWith(query)) { return rawUrl }
if (rawUrl.startsWith(query)) {
return rawUrl
}

val url = URL(rawUrl)
if (url.host.startsWith(query)) { return url.host + url.path }
// Strip "www".
return url.host.substring(WWW_PREFIX_OFFSET) + url.path
if (url.host.startsWith(query)) {
return url.host + url.path + url.port.orEmpty()
}

val strippedHost = url.host.noCommonSubdomains()

return if (strippedHost != url.host) {
strippedHost + url.port.orEmpty() + url.path
} else {
url.host + url.port.orEmpty() + url.path
}
}

private fun String.noCommonSubdomains(): String {
// This kind of stripping allows us to match "twitter" to "mobile.twitter.com".
val domainsToStrip = listOf("www", "mobile", "m")

domainsToStrip.forEach { domain ->
if (this.startsWith(domain)) {
return this.substring(domain.length + 1)
}
}
return this
}

private fun Int?.orEmpty(): String {
return this.takeIf { it != -1 }?.let { ":$it" }.orEmpty()
}
@@ -13,26 +13,57 @@ class DomainMatcherTest {
fun `should perform basic domain matching for a given query`() {
assertNull(segmentAwareDomainMatch("moz", listOf()))

val urls = listOf("http://www.mozilla.org", "http://firefox.com", "https://en.wikipedia.org/wiki/Mozilla", "about:config")
val urls = listOf(
"http://www.mozilla.org", "http://Firefox.com",
"https://mobile.twitter.com", "https://m.youtube.com",
"https://en.Wikipedia.org/Wiki/Mozilla",
"http://192.168.254.254:8000", "http://192.168.254.254:8000/admin",
"about:config"
)
// Full url matching.
assertEquals(
DomainMatch("http://www.mozilla.org", "mozilla.org"),
segmentAwareDomainMatch("moz", urls)
DomainMatch("http://www.mozilla.org", "http://www.mozilla.org"),
segmentAwareDomainMatch("http://www.m", urls)
)
// Protocol stripping.
assertEquals(
DomainMatch("http://www.mozilla.org", "www.mozilla.org"),
segmentAwareDomainMatch("www.moz", urls)
)
// Subdomain stripping.
assertEquals(
DomainMatch("https://en.wikipedia.org/wiki/Mozilla", "en.wikipedia.org/wiki/Mozilla"),
DomainMatch("http://www.mozilla.org", "mozilla.org"),
segmentAwareDomainMatch("moz", urls)
)
assertEquals(
DomainMatch("https://mobile.twitter.com", "twitter.com"),
segmentAwareDomainMatch("twit", urls)
)
assertEquals(
DomainMatch("https://m.youtube.com", "youtube.com"),
segmentAwareDomainMatch("yo", urls)
)
// Case insensitivity in the host and in the path. Subdomain matching and stripping.
assertEquals(
DomainMatch("https://en.wikipedia.org/wiki/mozilla", "en.wikipedia.org/wiki/mozilla"),
segmentAwareDomainMatch("en", urls)
)
assertEquals(
DomainMatch("https://en.wikipedia.org/wiki/mozilla", "en.wikipedia.org/wiki/mozilla"),
segmentAwareDomainMatch("en.wikipedia.org/wi", urls)
)
assertEquals(
DomainMatch("http://firefox.com", "firefox.com"),
segmentAwareDomainMatch("fire", urls)
)
// Urls with ports.
assertEquals(
DomainMatch("http://www.mozilla.org", "http://www.mozilla.org"),
segmentAwareDomainMatch("http://www.m", urls)
DomainMatch("http://192.168.254.254:8000", "192.168.254.254:8000"),
segmentAwareDomainMatch("192", urls)
)
assertEquals(
DomainMatch("http://192.168.254.254:8000/admin", "192.168.254.254:8000/admin"),
segmentAwareDomainMatch("192.168.254.254:8000/a", urls)
)

assertNull(segmentAwareDomainMatch("nomatch", urls))

0 comments on commit ff25ec3

Please sign in to comment.
You can’t perform that action at this time.