Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Fix escaped quotes in filter #552

Merged
merged 2 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions filter-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ mod error;
mod value;

use std::fmt::Debug;
use std::ops::Deref;
use std::str::FromStr;

pub use condition::{parse_condition, parse_to, Condition};
Expand Down Expand Up @@ -70,14 +69,6 @@ pub struct Token<'a> {
value: Option<String>,
}

impl<'a> Deref for Token<'a> {
type Target = &'a str;

fn deref(&self) -> &Self::Target {
&self.span
}
}

impl<'a> PartialEq for Token<'a> {
fn eq(&self, other: &Self) -> bool {
self.span.fragment() == other.span.fragment()
Expand All @@ -89,6 +80,10 @@ impl<'a> Token<'a> {
Self { span, value }
}

pub fn lexeme(&self) -> &str {
&self.span
}

pub fn value(&self) -> &str {
self.value.as_ref().map_or(&self.span, |value| value)
}
Expand Down
79 changes: 74 additions & 5 deletions milli/src/search/facet/filter.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::collections::HashSet;
use std::fmt::{Debug, Display};
use std::ops::Bound::{self, Excluded, Included};
use std::ops::Deref;

use either::Either;
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
Expand Down Expand Up @@ -283,8 +282,9 @@ impl<'a> Filter<'a> {
Condition::LowerThanOrEqual(val) => (Included(f64::MIN), Included(val.parse()?)),
Condition::Between { from, to } => (Included(from.parse()?), Included(to.parse()?)),
Condition::Equal(val) => {
let (_original_value, string_docids) =
strings_db.get(rtxn, &(field_id, &val.to_lowercase()))?.unwrap_or_default();
let (_original_value, string_docids) = strings_db
.get(rtxn, &(field_id, &val.value().to_lowercase()))?
.unwrap_or_default();
let number = val.parse::<f64>().ok();
let number_docids = match number {
Some(n) => {
Expand Down Expand Up @@ -362,7 +362,7 @@ impl<'a> Filter<'a> {
return Ok(RoaringBitmap::new());
}
} else {
match *fid.deref() {
match fid.lexeme() {
attribute @ "_geo" => {
return Err(fid.as_external_error(FilterError::BadGeo(attribute)))?;
}
Expand Down Expand Up @@ -461,7 +461,7 @@ mod tests {
use maplit::hashset;

use super::*;
use crate::update::{IndexerConfig, Settings};
use crate::update::{self, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use crate::Index;

#[test]
Expand Down Expand Up @@ -598,6 +598,75 @@ mod tests {
));
}

#[test]
fn escaped_quote_in_filter_value_2380() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();

let mut wtxn = index.write_txn().unwrap();
let content = documents!([
{
"id": "test_1",
"monitor_diagonal": "27' to 30'"
},
{
"id": "test_2",
"monitor_diagonal": "27\" to 30\""
},
{
"id": "test_3",
"monitor_diagonal": "27\" to 30'"
},
]);

let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
.unwrap();
builder.add_documents(content).unwrap();
builder.execute().unwrap();

wtxn.commit().unwrap();

let mut wtxn = index.write_txn().unwrap();
let mut builder = update::Settings::new(&mut wtxn, &index, &config);

builder.set_filterable_fields(hashset!(S("monitor_diagonal")));
builder.execute(|_| ()).unwrap();
wtxn.commit().unwrap();

let rtxn = index.read_txn().unwrap();

let mut search = crate::Search::new(&rtxn, &index);
// this filter is copy pasted from #2380 with the exact same espace sequence
search.filter(
crate::Filter::from_str("monitor_diagonal = '27\" to 30\\''").unwrap().unwrap(),
);
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![2]);

search.filter(
crate::Filter::from_str(r#"monitor_diagonal = "27' to 30'" "#).unwrap().unwrap(),
);
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![0]);

search.filter(
crate::Filter::from_str(r#"monitor_diagonal = "27\" to 30\"" "#).unwrap().unwrap(),
);
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![1]);

search.filter(
crate::Filter::from_str(r#"monitor_diagonal = "27\" to 30'" "#).unwrap().unwrap(),
);
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![2]);
}

#[test]
fn geo_radius_error() {
let path = tempfile::tempdir().unwrap();
Expand Down