Skip to content

Commit

Permalink
Add search restriction to specific uuids
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanfrishkorn committed Aug 18, 2023
1 parent 34f4858 commit 0a6a793
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 31 deletions.
19 changes: 19 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,15 @@ fn main() -> Result<(), Box<dyn Error>> {
.num_args(0)
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("uuid")
.help("search for matches in specified documents only")
.short('u')
.long("uuid")
.action(ArgAction::Append)
.required(false)
.value_delimiter(','),
)
.arg(Arg::new("terms").action(ArgAction::Append).required(true)),
)
.subcommand(
Expand Down Expand Up @@ -601,6 +610,15 @@ fn main() -> Result<(), Box<dyn Error>> {
})
.collect();

// restrict to specific uuids if supplied
let mut uuids: Vec<Uuid> = Vec::new();
if let Some(all_ids_str) = sub_matches.get_many::<String>("uuid") {
for id_str in all_ids_str {
let id = snip::search_uuid(&conn, id_str)?;
uuids.push(id);
}
}

// exclusionary terms
if let Some(args) = sub_matches.get_many::<String>("exclude") {
terms_exclude = stem_vec(args.map(|x| x.to_owned()).collect());
Expand Down Expand Up @@ -629,6 +647,7 @@ fn main() -> Result<(), Box<dyn Error>> {
terms_exclude: terms_exclude.clone(),
terms_optional: vec![],
method: SearchMethod::IndexStem,
uuids,
};
let search_results = snip::search_structured(&conn, search_query)?;
for item in search_results.items {
Expand Down
141 changes: 110 additions & 31 deletions src/snip/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pub struct SearchQuery {
pub terms_exclude: Vec<String>, // none of these terms may be present in a document
pub terms_optional: Vec<String>, // neither mandatory nor disqualifying, but increase score if present
pub method: SearchMethod, // search the index, document text field, etc.
pub uuids: Vec<Uuid>,
}

#[derive(Debug)]
Expand Down Expand Up @@ -47,43 +48,50 @@ pub fn search_structured(
search_query: SearchQuery,
) -> Result<SearchQueryResult, Box<dyn Error>> {
let mut query_result = SearchQueryResult { items: Vec::new() };
// let terms_positive_results: HashMap<String, Uuid> = HashMap::new();

// INCLUDE
let mut include_results: Vec<Uuid> = Vec::new();
for (i, term) in search_query.terms_include.iter().enumerate() {
let mut result = search_uuids_matching_term(conn, term)?;
// println!("iter result: {:?}", result);
// push all results on first run for next iteration comparison
if i == 0 {
include_results.append(&mut result);
// break if there was only one term
if search_query.terms_include.len() == 1 {
break;
}
continue;
}
let mut exclude_results: Vec<Uuid> = Vec::new();

// filter non-matching uuids
include_results.retain_mut(|id| result.contains(id));
}
// println!("include_results: {:?}", include_results);
// if search uuids are not set, search all documents
if search_query.uuids.is_empty() {
// INCLUDE
for (i, term) in search_query.terms_include.iter().enumerate() {
let mut result = search_uuids_matching_term(conn, term)?;
// println!("iter result: {:?}", result);
// push all results on first run for next iteration comparison
if i == 0 {
include_results.append(&mut result);
// break if there was only one term
if search_query.terms_include.len() == 1 {
break;
}
continue;
}

// EXCLUDE
let mut exclude_results: Vec<Uuid> = Vec::new();
for term in search_query.terms_exclude {
let result = search_uuids_matching_term(conn, &term)?;
for r in result {
if !exclude_results.contains(&r) {
exclude_results.push(r);
// filter non-matching uuids
include_results.retain_mut(|id| result.contains(id));
}
// println!("include_results: {:?}", include_results);

// EXCLUDE
for term in search_query.terms_exclude {
let result = search_uuids_matching_term(conn, &term)?;
for r in result {
if !exclude_results.contains(&r) {
exclude_results.push(r);
}
}
}
// println!("exclude_results: {:?}", exclude_results);

// SUBTRACT EXCLUDE FROM INCLUDE
include_results.retain_mut(|id| !exclude_results.contains(id));
// println!("filtered_results: {:?}", include_results);
} else {
// restrict search to supplied uuids
for uuid in search_query.uuids {
include_results.push(uuid);
}
}
// println!("exclude_results: {:?}", exclude_results);

// SUBTRACT EXCLUDE FROM INCLUDE
include_results.retain_mut(|id| !exclude_results.contains(id));
// println!("filtered_results: {:?}", include_results);

// BUILD OUTPUT
for uuid in include_results {
Expand Down Expand Up @@ -325,6 +333,7 @@ mod tests {
terms_exclude: vec!["fuzz".to_string()],
terms_optional: vec![],
method: SearchMethod::IndexStem,
uuids: vec![],
};

let expect = SearchQueryResult {
Expand Down Expand Up @@ -371,6 +380,76 @@ mod tests {
Ok(())
}

#[test]
fn test_search_structured_uuids() -> Result<(), Box<dyn Error>> {
let conn = prepare_database()?;
snip::index_all_items(&conn)?;

// Lorem ipsum
let id: Uuid = Uuid::try_parse(ID_STR)?;
let query = SearchQuery {
terms_include: vec!["lorem".to_string(), "ipsum".to_string()],
terms_exclude: vec!["fuzz".to_string()],
terms_optional: vec![],
method: SearchMethod::IndexStem,
uuids: vec![id],
};
let result = search_structured(&conn, query)?;
// println!("result: {:#?}", result);
let item = result.items.get(0).unwrap();
// check length of positions for "lorem"
let item_lorem_len = item.matches.get("lorem").unwrap().len();
let item_lorem_len_expect = 2;
if item_lorem_len != item_lorem_len_expect {
panic!(
"expected {} matches for 'lorem', got {}",
item_lorem_len_expect, item_lorem_len
);
}
// check length of positions for "ipsum"
let item_ipsum_len = item.matches.get("ipsum").unwrap().len();
let item_ipsum_len_expect = 5;
if item_ipsum_len != item_ipsum_len_expect {
panic!(
"expected {} matches for 'ipsum', got {}",
item_ipsum_len_expect, item_ipsum_len
);
}

// Fuzzing document
let id = Uuid::try_parse("990a917e-66d3-404b-9502-e8341964730b")?;
let query = SearchQuery {
terms_include: vec!["fuzz".to_string(), "random".to_string()],
terms_exclude: vec!["lorem".to_string()],
terms_optional: vec![],
method: SearchMethod::IndexStem,
uuids: vec![id],
};
let result = search_structured(&conn, query)?;
// println!("result: {:#?}", result);
// check length of positions for "fuzz"
let item = result.items.get(0).unwrap();
let item_fuzz_len = item.matches.get("fuzz").unwrap().len();
let item_fuzz_len_expect = 7;
if item_fuzz_len != item_fuzz_len_expect {
panic!(
"expected {} matches for 'fuzz', got {}",
item_fuzz_len_expect, item_fuzz_len
);
}
// check length of positions for "random"
let item_random_len = item.matches.get("random").unwrap().len();
let item_random_len_expect = 1;
if item_random_len != item_random_len_expect {
panic!(
"expected {} matches for 'random', got {}",
item_random_len_expect, item_random_len
);
}

Ok(())
}

#[test]
fn test_search_uuid() -> Result<(), Box<dyn Error>> {
let conn = prepare_database().expect("preparing in-memory database");
Expand Down

0 comments on commit 0a6a793

Please sign in to comment.