Skip to content
This repository has been archived by the owner on Nov 1, 2022. It is now read-only.

Extension functions for converting URLs to search terms #10480

Merged
merged 1 commit into from Jun 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -5,6 +5,10 @@
package mozilla.components.browser.state.search

import android.graphics.Bitmap
import android.net.Uri

// OpenSearch parameter for search terms.
const val OS_SEARCH_ENGINE_TERMS_PARAM = "{" + "searchTerms" + "}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: This could just be "{searchTerms}". :)


/**
* A data class representing a search engine.
Expand Down Expand Up @@ -43,4 +47,17 @@ data class SearchEngine(
*/
CUSTOM,
}

// Cache these parameters to avoid repeated parsing.
// Assume we always have at least one entry in `resultUrls`.
val resultsUrl: Uri by lazy { Uri.parse(this.resultUrls[0]) }
// This assumes that search parameters are always "on their own" within the param value,
// e.g. always in a form of ?q={searchTerms}, never ?q=somePrefix-{searchTerms}
val searchParameterName by lazy { resultsUrl.queryParameterNames.find {
grigoryk marked this conversation as resolved.
Show resolved Hide resolved
try {
resultsUrl.getQueryParameter(it) == OS_SEARCH_ENGINE_TERMS_PARAM
} catch (e: UnsupportedOperationException) {
false
}
} }
}
Expand Up @@ -5,7 +5,13 @@
package mozilla.components.feature.search.ext

import android.graphics.Bitmap
import android.net.Uri
import androidx.annotation.VisibleForTesting
import mozilla.components.browser.state.search.OS_SEARCH_ENGINE_TERMS_PARAM
import mozilla.components.browser.state.search.SearchEngine
import mozilla.components.browser.state.state.SearchState
import mozilla.components.browser.state.state.searchEngines
import mozilla.components.browser.state.state.selectedOrDefaultSearchEngine
import mozilla.components.feature.search.internal.SearchUrlBuilder
import mozilla.components.feature.search.storage.SearchEngineReader
import java.io.InputStream
Expand All @@ -21,7 +27,7 @@ fun createSearchEngine(
icon: Bitmap,
suggestUrl: String? = null
): SearchEngine {
if (!url.contains("{searchTerms}")) {
if (!url.contains(OS_SEARCH_ENGINE_TERMS_PARAM)) {
throw IllegalArgumentException("URL does not contain search terms placeholder")
}

Expand Down Expand Up @@ -66,3 +72,49 @@ fun parseLegacySearchEngine(id: String, stream: InputStream): SearchEngine {
val reader = SearchEngineReader(SearchEngine.Type.CUSTOM)
return reader.loadStream(id, stream)
}

/**
* Given a [SearchState], determine if the passed-in [url] is a known search results page url
* and what are the associated search terms.
* @return Search terms if [url] is a known search results page, `null` otherwise.
*/
fun SearchState.parseSearchTerms(url: String): String? {
val parsedUrl = Uri.parse(url)
// Default/selected engine is the most likely to match, check it first.
val currentEngine = this.selectedOrDefaultSearchEngine
// Or go through the rest of known engines.
val fallback: () -> String? = fallback@{
this.searchEngines.forEach { searchEngine ->
searchEngine.parseSearchTerms(parsedUrl)?.let { return@fallback it }
}
return@fallback null
}
return currentEngine?.parseSearchTerms(parsedUrl) ?: fallback()
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this will not work for every search engine. Some (like Wikipedia) do a redirect and the resulting URL will not match the build URL anymore.


/**
* Given a [SearchEngine], determine if the passed-in [url] matches its results template,
* and what are the associated search terms.
* @return Search terms if [url] matches the results page template, `null` otherwise.
*/
@VisibleForTesting
fun SearchEngine.parseSearchTerms(url: Uri): String? {
// Basic approach:
// - look at the "base" of the template url; if there's a match, continue
// - see if the GET parameter for the search terms is present in the url
// - if that param present, its value is our answer if it's non-empty
val searchResultsRoot = this.resultsUrl.authority + this.resultsUrl.path
val urlRoot = url.authority + url.path

return if (searchResultsRoot == urlRoot) {
val searchTerms = try {
url.getQueryParameter(this.searchParameterName)
} catch (e: UnsupportedOperationException) {
// Non-hierarchical url.
null
}
searchTerms.takeUnless { it.isNullOrEmpty() }
} else {
null
}
}
Expand Up @@ -6,6 +6,7 @@ package mozilla.components.feature.search.internal

import android.net.Uri
import android.text.TextUtils
import mozilla.components.browser.state.search.OS_SEARCH_ENGINE_TERMS_PARAM
import mozilla.components.browser.state.search.SearchEngine
import java.util.Locale

Expand All @@ -21,7 +22,7 @@ private const val MOZ_PARAM_OFFICIAL = "{" + "moz:official" + "}"

// Supported OpenSearch parameters
// See http://opensearch.a9.com/spec/1.1/querysyntax/#core
private const val OS_PARAM_USER_DEFINED = "{" + "searchTerms" + "}"
private const val OS_PARAM_USER_DEFINED = OS_SEARCH_ENGINE_TERMS_PARAM
private const val OS_PARAM_INPUT_ENCODING = "{" + "inputEncoding" + "}"
private const val OS_PARAM_LANGUAGE = "{" + "language" + "}"
private const val OS_PARAM_OUTPUT_ENCODING = "{" + "outputEncoding" + "}"
Expand Down
Expand Up @@ -4,10 +4,13 @@

package mozilla.components.feature.search.ext

import android.net.Uri
import androidx.test.ext.junit.runners.AndroidJUnit4
import mozilla.components.browser.state.search.SearchEngine
import mozilla.components.browser.state.state.SearchState
import mozilla.components.support.test.mock
import org.junit.Assert.assertEquals
import org.junit.Assert.assertNull
import org.junit.Test
import org.junit.runner.RunWith
import java.util.UUID
Expand Down Expand Up @@ -45,4 +48,99 @@ class SearchEngineKtTest {
searchEngine.buildSearchUrl("Hello World")
)
}

@Test
fun `GIVEN ecosia search engine and a set of urls THEN search terms are determined when present`() {
val searchEngine = createSearchEngine(
name = "Ecosia",
icon = mock(),
url = "https://www.ecosia.org/search?q={searchTerms}"
)

assertNull(searchEngine.parseSearchTerms(Uri.parse("https://yandex.ru/search/?text=")))
assertNull(searchEngine.parseSearchTerms(Uri.parse("https://www.ecosia.org/search?q=")))
assertNull(searchEngine.parseSearchTerms(Uri.parse("https://www.ecosia.org/search?attr=moz-test")))

assertEquals(
"second test search",
searchEngine.parseSearchTerms(Uri.parse("https://www.ecosia.org/search?q=second%20test%20search"))
)

assertEquals(
"Another test",
searchEngine.parseSearchTerms(Uri.parse("https://www.ecosia.org/search?r=134s7&attr=moz-test&q=Another%20test&d=136697676793"))
)
}

@Test
fun `GIVEN yandex search engine and a set of urls THEN search terms are determined when present`() {
val searchEngine = createSearchEngine(
name = "Yandex",
icon = mock(),
url = "https://yandex.ru/search/?text={searchTerms}"
)

assertNull(searchEngine.parseSearchTerms(Uri.parse("https://www.ecosia.org/search?q=")))
assertNull(searchEngine.parseSearchTerms(Uri.parse("https://yandex.ru/search/?text=")))
assertNull(searchEngine.parseSearchTerms(Uri.parse("https://yandex.ru/search/?attr=moz-test")))

assertEquals(
"фаерфокс",
searchEngine.parseSearchTerms(Uri.parse("https://yandex.ru/search/?text=%D1%84%D0%B0%D0%B5%D1%80%D1%84%D0%BE%D0%BA%D1%81&lr=21512"))
)

assertEquals(
"the sandbaggers",
searchEngine.parseSearchTerms(Uri.parse("https://yandex.ru/search/?lr=21512&text=the%20sandbaggers&redircnt=1623745822.1"))
)
}

@Test
fun `GIVEN empty search state THEN search terms are never determined`() {
val searchState = SearchState()
assertNull(searchState.parseSearchTerms("https://yandex.ru/search/?lr=21512&text=the%20sandbaggers&redircnt=1623745822.1"))
}

@Test
fun `GIVEN a search state and a set of urls THEN search terms are determined when present`() {
val yandex = createSearchEngine(
name = "Yandex",
icon = mock(),
url = "https://yandex.ru/search/?text={searchTerms}"
)
val ecosia = createSearchEngine(
name = "Ecosia",
icon = mock(),
url = "https://www.ecosia.org/search?q={searchTerms}"
)
val baidu = createSearchEngine(
name = "Baidu",
icon = mock(),
url = "https://www.baidu.com/s?wd={searchTerms}"
)
val searchState = SearchState(
regionSearchEngines = listOf(yandex, baidu),
additionalSearchEngines = listOf(ecosia),
customSearchEngines = listOf(baidu, ecosia)
)

assertNull(searchState.parseSearchTerms("https://www.ecosia.org/search?q="))
assertNull(searchState.parseSearchTerms("http://help.baidu.com/"))
assertEquals(
"神舟十二号载人飞行任务标识发布",
searchState.parseSearchTerms("https://www.baidu.com/s?cl=3&tn=baidutop10&fr=top1000&wd=%E7%A5%9E%E8%88%9F%E5%8D%81%E4%BA%8C%E5%8F%B7%E8%BD%BD%E4%BA%BA%E9%A3%9E%E8%A1%8C%E4%BB%BB%E5%8A%A1%E6%A0%87%E8%AF%86%E5%8F%91%E5%B8%83&rsv_idx=2&rsv_dl=fyb_n_homepage&hisfilter=1")
)
assertEquals(
"the sandbaggers",
searchState.parseSearchTerms("https://yandex.ru/search/?lr=21512&text=the%20sandbaggers&redircnt=1623745822.1")
)
assertEquals(
"фаерфокс",
searchState.parseSearchTerms("https://yandex.ru/search/?text=%D1%84%D0%B0%D0%B5%D1%80%D1%84%D0%BE%D0%BA%D1%81&lr=21512")
)
assertEquals(
"Another test",
searchState.parseSearchTerms("https://www.ecosia.org/search?r=134s7&attr=moz-test&q=Another%20test&d=136697676793")
)
}
}