Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 1874068 - Suggest: dedupe suggestion fetching suggestions with duplicate prefix keywords #6098

Merged
merged 1 commit into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 34 additions & 29 deletions components/suggest/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,14 +323,18 @@ impl<'a> SuggestDao<'a> {
let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased);
let suggestions_limit = &query.limit.unwrap_or(-1);
let suggestions = self.conn.query_rows_and_then_cached(
"SELECT s.id, k.rank, s.title, s.url, s.provider, s.score, k.confidence, k.keyword_suffix
FROM suggestions s
JOIN prefix_keywords k ON k.suggestion_id = s.id
WHERE k.keyword_prefix = :keyword_prefix AND s.provider = :provider
ORDER by s.score DESC
LIMIT :suggestions_limit",
"SELECT s.id, MAX(k.rank) AS rank, s.title, s.url, s.provider, s.score, k.keyword_suffix
FROM suggestions s
JOIN prefix_keywords k ON k.suggestion_id = s.id
WHERE k.keyword_prefix = :keyword_prefix AND
k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF' AND
s.provider = :provider
GROUP BY s.id
ORDER BY s.score DESC, rank DESC
LIMIT :suggestions_limit",
named_params! {
":keyword_prefix": keyword_prefix,
":keyword_suffix": keyword_suffix,
":provider": SuggestionProvider::Amo,
":suggestions_limit": suggestions_limit,
},
Expand Down Expand Up @@ -370,19 +374,20 @@ impl<'a> SuggestDao<'a> {
pub fn fetch_pocket_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
let keyword_lowercased = &query.keyword.to_lowercase();
let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased);
let suggestions_limit = &query.limit.unwrap_or(-1);
let suggestions = self.conn.query_rows_and_then_cached(
"SELECT s.id, k.rank, s.title, s.url, s.provider, s.score, k.confidence, k.keyword_suffix
FROM suggestions s
JOIN prefix_keywords k ON k.suggestion_id = s.id
WHERE k.keyword_prefix = :keyword_prefix AND s.provider = :provider
ORDER BY s.score DESC
LIMIT :suggestions_limit",
"SELECT s.id, MAX(k.rank) AS rank, s.title, s.url, s.provider, s.score, k.confidence,
k.keyword_suffix
FROM suggestions s
JOIN prefix_keywords k ON k.suggestion_id = s.id
WHERE k.keyword_prefix = :keyword_prefix AND
k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF' AND
s.provider = :provider
GROUP BY s.id, k.confidence
ORDER BY s.score DESC, rank DESC",
named_params! {
":keyword_prefix": keyword_prefix,
":keyword_suffix": keyword_suffix,
":provider": SuggestionProvider::Pocket,
":suggestions_limit": suggestions_limit,

},
|row| -> Result<Option<Suggestion>>{
let title = row.get("title")?;
Expand All @@ -407,7 +412,11 @@ impl<'a> SuggestDao<'a> {
Ok(None)
}
}
)?.into_iter().flatten().collect();
)?.into_iter().flatten().take(
query.limit.and_then(|limit|
usize::try_from(limit).ok()
).unwrap_or(usize::MAX)
).collect();
Ok(suggestions)
}

Expand All @@ -419,23 +428,19 @@ impl<'a> SuggestDao<'a> {
let suggestions = self
.conn
.query_rows_and_then_cached(
r#"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do like this roomier formatting for the SQL strings that @ncloudioj used for MDN—it would be nice to adopt it elsewhere, but not at all a blocker for your PR 😄

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thanks, Lina! I stole this script from our pipeline project so that I don't need to manually format SQL any more. The only downside is that it could get too sparse sometimes.

Copy link
Contributor Author

@tiftran tiftran Feb 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh this is fancy, will have a PR up for this soon

SELECT
s.id, s.title, s.url, s.provider, s.score, k.keyword_suffix
FROM
suggestions s
JOIN
prefix_keywords k ON k.suggestion_id = s.id
WHERE
k.keyword_prefix = :keyword_prefix
AND
"SELECT s.id, MAX(k.rank) AS rank, s.title, s.url, s.provider, s.score, k.keyword_suffix
FROM suggestions s
JOIN prefix_keywords k ON k.suggestion_id = s.id
WHERE k.keyword_prefix = :keyword_prefix AND
k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF' AND
s.provider = :provider
ORDER BY
s.score DESC
GROUP BY s.id
ORDER BY s.score DESC, rank DESC
LIMIT :suggestions_limit
"#,
",
named_params! {
":keyword_prefix": keyword_prefix,
":keyword_suffix": keyword_suffix,
":provider": SuggestionProvider::Mdn,
":suggestions_limit": suggestions_limit,
},
Expand Down
147 changes: 147 additions & 0 deletions components/suggest/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3411,6 +3411,153 @@ mod tests {

Ok(())
}

// Tests querying multiple suggestions with multiple keywords with same prefix keyword
#[test]
fn query_with_multiple_suggestions_with_same_prefix() -> anyhow::Result<()> {
before_each();

let snapshot = Snapshot::with_records(json!([{
"id": "data-1",
"type": "amo-suggestions",
"last_modified": 15,
"attachment": {
"filename": "data-1.json",
"mimetype": "application/json",
"location": "data-1.json",
"hash": "",
"size": 0,
},
}, {
"id": "data-2",
"type": "pocket-suggestions",
"last_modified": 15,
"attachment": {
"filename": "data-2.json",
"mimetype": "application/json",
"location": "data-2.json",
"hash": "",
"size": 0,
},
}, {
"id": "icon-3",
"type": "icon",
"last_modified": 25,
"attachment": {
"filename": "icon-3.png",
"mimetype": "image/png",
"location": "icon-3.png",
"hash": "",
"size": 0,
},
}]))?
.with_data(
"data-1.json",
json!([
{
"description": "amo suggestion",
"url": "https://addons.mozilla.org/en-US/firefox/addon/example",
"guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
"keywords": ["relay", "spam", "masking email", "masking emails", "masking accounts", "alias" ],
"title": "Firefox Relay",
"icon": "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
"rating": "4.9",
"number_of_ratings": 888,
"score": 0.25
}
]),
)?
.with_data(
"data-2.json",
json!([
{
"description": "pocket suggestion",
"url": "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
"lowConfidenceKeywords": ["soft life", "soft living", "soft work", "workaholism", "toxic work culture"],
"highConfidenceKeywords": ["burnout women", "grind culture", "women burnout", "soft lives"],
"title": "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
"score": 0.05
}
]),
)?
.with_icon("icon-3.png", "also-an-icon".as_bytes().into());

let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));

store.ingest(SuggestIngestionConstraints::default())?;

let table = [
(
"keyword = `soft li`; pocket",
SuggestionQuery {
keyword: "soft li".into(),
providers: vec![SuggestionProvider::Pocket],
limit: None,
},
expect![[r#"
[
Pocket {
title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
score: 0.05,
is_top_pick: false,
},
]
"#]],
),
(
"keyword = `soft lives`; pocket",
SuggestionQuery {
keyword: "soft lives".into(),
providers: vec![SuggestionProvider::Pocket],
limit: None,
},
expect![[r#"
[
Pocket {
title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
score: 0.05,
is_top_pick: true,
},
]
"#]],
),
(
"keyword = `masking `; amo provider",
SuggestionQuery {
keyword: "masking ".into(),
providers: vec![SuggestionProvider::Amo],
limit: None,
},
expect![[r#"
[
Amo {
title: "Firefox Relay",
url: "https://addons.mozilla.org/en-US/firefox/addon/example",
icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
description: "amo suggestion",
rating: Some(
"4.9",
),
number_of_ratings: 888,
guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
score: 0.25,
},
]
"#]],
),
];
for (what, query, expect) in table {
expect.assert_debug_eq(
&store
.query(query)
.with_context(|| format!("Couldn't query store for {}", what))?,
);
}

Ok(())
}
/// Tests ingesting malformed Remote Settings records that we understand,
/// but that are missing fields, or aren't in the format we expect.
#[test]
Expand Down