Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Revert "Revert "Sort at query time""
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerollmops committed Aug 24, 2021
1 parent 879d5e8 commit 89d0758
Show file tree
Hide file tree
Showing 17 changed files with 701 additions and 148 deletions.
8 changes: 4 additions & 4 deletions benchmarks/benches/search_songs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ fn bench_songs(c: &mut criterion::Criterion) {
milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect();
let default_criterion = default_criterion.iter().map(|s| s.as_str());
let asc_default: Vec<&str> =
std::iter::once("asc(released-timestamp)").chain(default_criterion.clone()).collect();
std::iter::once("released-timestamp:asc").chain(default_criterion.clone()).collect();
let desc_default: Vec<&str> =
std::iter::once("desc(released-timestamp)").chain(default_criterion.clone()).collect();
std::iter::once("released-timestamp:desc").chain(default_criterion.clone()).collect();

let basic_with_quote: Vec<String> = BASE_CONF
.queries
Expand Down Expand Up @@ -118,12 +118,12 @@ fn bench_songs(c: &mut criterion::Criterion) {
},
utils::Conf {
group_name: "asc",
criterion: Some(&["asc(released-timestamp)"]),
criterion: Some(&["released-timestamp:desc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc",
criterion: Some(&["desc(released-timestamp)"]),
criterion: Some(&["released-timestamp:desc"]),
..BASE_CONF
},

Expand Down
4 changes: 2 additions & 2 deletions http-ui/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1030,7 +1030,7 @@ mod tests {
displayed_attributes: Setting::Set(vec!["name".to_string()]),
searchable_attributes: Setting::Set(vec!["age".to_string()]),
filterable_attributes: Setting::Set(hashset! { "age".to_string() }),
criteria: Setting::Set(vec!["asc(age)".to_string()]),
criteria: Setting::Set(vec!["age:asc".to_string()]),
stop_words: Setting::Set(btreeset! { "and".to_string() }),
synonyms: Setting::Set(hashmap! { "alex".to_string() => vec!["alexey".to_string()] }),
};
Expand Down Expand Up @@ -1058,7 +1058,7 @@ mod tests {
Token::Str("criteria"),
Token::Some,
Token::Seq { len: Some(1) },
Token::Str("asc(age)"),
Token::Str("age:asc"),
Token::SeqEnd,
Token::Str("stopWords"),
Token::Some,
Expand Down
1 change: 0 additions & 1 deletion milli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ obkv = "0.2.0"
once_cell = "1.5.2"
ordered-float = "2.1.1"
rayon = "1.5.0"
regex = "1.4.3"
roaring = "0.6.6"
serde = { version = "1.0.123", features = ["derive"] }
serde_json = { version = "1.0.62", features = ["preserve_order"] }
Expand Down
69 changes: 43 additions & 26 deletions milli/src/criterion.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
use std::fmt;
use std::str::FromStr;

use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};

use crate::error::{Error, UserError};

static ASC_DESC_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());

#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Criterion {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.
Words,
/// Sorted by increasing number of typos.
Typo,
/// Dynamically sort at query time the documents. None, one or multiple Asc/Desc sortable
/// attributes can be used in place of this criterion at query time.
Sort,
/// Sorted by increasing distance between matched query terms.
Proximity,
/// Documents with quey words contained in more important
/// attributes are considred better.
/// attributes are considered better.
Attribute,
/// Sorted by the similarity of the matched words with the query words.
Exactness,
Expand All @@ -43,29 +41,46 @@ impl Criterion {
impl FromStr for Criterion {
type Err = Error;

fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
match txt {
fn from_str(text: &str) -> Result<Criterion, Self::Err> {
match text {
"words" => Ok(Criterion::Words),
"typo" => Ok(Criterion::Typo),
"sort" => Ok(Criterion::Sort),
"proximity" => Ok(Criterion::Proximity),
"attribute" => Ok(Criterion::Attribute),
"exactness" => Ok(Criterion::Exactness),
text => {
let caps = ASC_DESC_REGEX
.captures(text)
.ok_or_else(|| UserError::InvalidCriterionName { name: text.to_string() })?;
let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str();
match order {
"asc" => Ok(Criterion::Asc(field_name.to_string())),
"desc" => Ok(Criterion::Desc(field_name.to_string())),
text => {
return Err(
UserError::InvalidCriterionName { name: text.to_string() }.into()
)
}
}
}
text => match AscDesc::from_str(text) {
Ok(AscDesc::Asc(field)) => Ok(Criterion::Asc(field)),
Ok(AscDesc::Desc(field)) => Ok(Criterion::Desc(field)),
Err(error) => Err(error.into()),
},
}
}
}

#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum AscDesc {
Asc(String),
Desc(String),
}

impl AscDesc {
pub fn field(&self) -> &str {
match self {
AscDesc::Asc(field) => field,
AscDesc::Desc(field) => field,
}
}
}

impl FromStr for AscDesc {
type Err = UserError;

fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
match text.rsplit_once(':') {
Some((field_name, "asc")) => Ok(AscDesc::Asc(field_name.to_string())),
Some((field_name, "desc")) => Ok(AscDesc::Desc(field_name.to_string())),
_ => Err(UserError::InvalidCriterionName { name: text.to_string() }),
}
}
}
Expand All @@ -74,6 +89,7 @@ pub fn default_criteria() -> Vec<Criterion> {
vec![
Criterion::Words,
Criterion::Typo,
Criterion::Sort,
Criterion::Proximity,
Criterion::Attribute,
Criterion::Exactness,
Expand All @@ -87,11 +103,12 @@ impl fmt::Display for Criterion {
match self {
Words => f.write_str("words"),
Typo => f.write_str("typo"),
Sort => f.write_str("sort"),
Proximity => f.write_str("proximity"),
Attribute => f.write_str("attribute"),
Exactness => f.write_str("exactness"),
Asc(attr) => write!(f, "asc({})", attr),
Desc(attr) => write!(f, "desc({})", attr),
Asc(attr) => write!(f, "{}:asc", attr),
Desc(attr) => write!(f, "{}:desc", attr),
}
}
}
10 changes: 10 additions & 0 deletions milli/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ pub enum UserError {
InvalidFacetsDistribution { invalid_facets_name: HashSet<String> },
InvalidFilter(pest::error::Error<ParserRule>),
InvalidFilterAttribute(pest::error::Error<ParserRule>),
InvalidSortableAttribute { field: String, valid_fields: HashSet<String> },
InvalidStoreFile,
MaxDatabaseSizeReached,
MissingDocumentId { document: Object },
Expand Down Expand Up @@ -226,6 +227,15 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
)
}
Self::InvalidFilterAttribute(error) => error.fmt(f),
Self::InvalidSortableAttribute { field, valid_fields } => {
let valid_names =
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<_>>().join(", ");
write!(
f,
"Attribute {} is not sortable, available sortable attributes are: {}",
field, valid_names
)
}
Self::MissingDocumentId { document } => {
let json = serde_json::to_string(document).unwrap();
write!(f, "document doesn't have an identifier {}", json)
Expand Down
36 changes: 35 additions & 1 deletion milli/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pub mod main_key {
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
Expand Down Expand Up @@ -446,13 +447,45 @@ impl Index {
Ok(fields_ids)
}

/* sortable fields */

/// Writes the sortable fields names in the database.
pub(crate) fn put_sortable_fields(
&self,
wtxn: &mut RwTxn,
fields: &HashSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::SORTABLE_FIELDS_KEY, fields)
}

/// Deletes the sortable fields ids in the database.
pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SORTABLE_FIELDS_KEY)
}

/// Returns the sortable fields names.
pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self
.main
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::SORTABLE_FIELDS_KEY)?
.unwrap_or_default())
}

/// Identical to `sortable_fields`, but returns ids instead.
pub fn sortable_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
let fields = self.sortable_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect())
}

/* faceted documents ids */

/// Returns the faceted fields names.
///
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
/// Faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
pub fn faceted_fields(&self, rtxn: &RoTxn) -> Result<HashSet<String>> {
let filterable_fields = self.filterable_fields(rtxn)?;
let sortable_fields = self.sortable_fields(rtxn)?;
let distinct_field = self.distinct_field(rtxn)?;
let asc_desc_fields =
self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
Expand All @@ -461,6 +494,7 @@ impl Index {
});

let mut faceted_fields = filterable_fields;
faceted_fields.extend(sortable_fields);
faceted_fields.extend(asc_desc_fields);
if let Some(field) = distinct_field {
faceted_fields.insert(field.to_owned());
Expand Down
2 changes: 1 addition & 1 deletion milli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use std::result::Result as StdResult;
use fxhash::{FxHasher32, FxHasher64};
use serde_json::{Map, Value};

pub use self::criterion::{default_criteria, Criterion};
pub use self::criterion::{default_criteria, AscDesc, Criterion};
pub use self::error::{
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
};
Expand Down
Loading

0 comments on commit 89d0758

Please sign in to comment.