Skip to content

Commit

Permalink
Improve structured search limits
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanfrishkorn committed Oct 3, 2023
1 parent 760ebdd commit 7b5e616
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 68 deletions.
149 changes: 84 additions & 65 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,17 +215,11 @@ fn main() -> Result<(), Box<dyn Error>> {
.value_delimiter(',')
.action(ArgAction::Append),
)
.arg(
Arg::new("match-limit")
.help("limit the number of match excerpts displayed")
.long("match-limit")
.num_args(1)
.action(ArgAction::Append),
)
.arg(
Arg::new("count")
.help("print match count only (no excerpts)")
.short('c')
.long("count")
.num_args(0)
.required(false)
.action(ArgAction::SetTrue),
Expand All @@ -239,6 +233,21 @@ fn main() -> Result<(), Box<dyn Error>> {
.required(false)
.action(ArgAction::Append),
)
.arg(
Arg::new("excerpt-limit")
.help("limit the number of match excerpts displayed")
.long("excerpts")
.num_args(1)
.action(ArgAction::Append),
)
.arg(
Arg::new("limit")
.help("max number of documents to match")
.short('l')
.long("limit")
.num_args(1)
.action(ArgAction::Append),
)
.arg(
Arg::new("raw")
.help("do not strip newlines or returns from search excerpt")
Expand Down Expand Up @@ -417,7 +426,7 @@ fn main() -> Result<(), Box<dyn Error>> {
if std::io::stdout().is_terminal() {
eprintln!("{}", header.build().bright_black());
}
list_items(&conn, header, 0)?;
list_items(&conn, header, None)?;
}

// ATTACH RM
Expand Down Expand Up @@ -543,7 +552,7 @@ fn main() -> Result<(), Box<dyn Error>> {
// clear all data to ensure consistency
snip::clear_index(&conn)?;

let ids = snip::uuid_list(&conn, 0)?;
let ids = snip::uuid_list(&conn, None)?;
let mut status_len: usize;
eprint!("indexing...");
for (i, id) in ids.iter().enumerate() {
Expand Down Expand Up @@ -595,9 +604,9 @@ fn main() -> Result<(), Box<dyn Error>> {
header.add("name", 0, ListHeadingAlignment::Left);

// check for limit
let mut limit: usize = 0;
let mut limit: Option<usize> = None;
if let Some(v) = arg_matches.get_one::<String>("number") {
limit = v.parse::<usize>()?;
limit = Some(v.parse::<usize>()?);
}

if std::io::stdout().is_terminal() {
Expand Down Expand Up @@ -678,10 +687,16 @@ fn main() -> Result<(), Box<dyn Error>> {
terms_exclude = stem_vec(args.map(|x| x.to_owned()).collect());
}

// establish match limit
let mut excerpt_limit = 0;
if let Some(limit) = sub_matches.get_one::<String>("match-limit") {
excerpt_limit = limit.parse::<usize>()?;
// establish document limit
let mut limit: Option<usize> = None;
if let Some(document_limit) = sub_matches.get_one::<String>("limit") {
limit = Some(document_limit.parse::<usize>()?);
}

// all excerpts are printed by default, unless a maximum per document is set
let mut excerpts: Option<usize> = None;
if let Some(limit) = sub_matches.get_one::<String>("excerpt-limit") {
excerpts = Some(limit.parse::<usize>()?);
}

// establish number of surrounding context words to display
Expand All @@ -702,66 +717,18 @@ fn main() -> Result<(), Box<dyn Error>> {
terms_optional: vec![],
method: SearchMethod::IndexStem,
uuids,
limit,
};
let search_results = match snip::search_structured(&conn, search_query) {
Ok(v) => v,
Err(e) => return Err(Box::new(e)),
};

// exit if only counts are requested
if sub_matches.get_flag("count") {
return Ok(());
}

// exit if no results are present
if search_results.items.is_empty() {
return Ok(());
}

for item in &search_results.items {
let mut s = snip::get_from_uuid(&conn, &item.uuid)?;
s.analyze()?;
println!("{}", s.name.white());
print!(" {}", snip::split_uuid(&s.uuid)[0].bright_blue());

// create and print a summary of terms and counts
let mut terms_summary: HashMap<String, usize> = HashMap::new();
for (term, positions) in &item.matches {
terms_summary.insert(term.clone(), positions.len());
}
print!(" [");
// use argument terms vector to order by term
for (i, term) in terms_include.iter().enumerate() {
if let Some(count) = terms_summary.get(term.as_str()) {
print!("{}: {}", term, count);
if i != terms_summary.len() - 1 {
print!(" ");
}
}
}
print!("]");
println!();

// for each position, gather context and display
for term in &terms_include {
if let Some(positions) = item.matches.get(term.as_str()) {
for (i, pos) in positions.iter().enumerate() {
// if limit is hit, show the additional match count
if i != 0 && i == excerpt_limit {
println!(" ...additional matches: {}", positions.len() - i);
break;
}

// this gathers an excerpt from the supplied position
let excerpt =
s.analysis.get_excerpt(pos, context_words, context_raw)?;
excerpt.print(context_raw);
}
}
}
println!();
}

// print to stderr to keep redirection clean
eprint!("document");
if search_results.items.len() != 1 {
Expand All @@ -777,6 +744,54 @@ fn main() -> Result<(), Box<dyn Error>> {
}
eprintln!(" occurrences: {}", term_match_count);

// we don't need excerpts for count only
if !sub_matches.get_flag("count") {
for item in &search_results.items {
let mut s = snip::get_from_uuid(&conn, &item.uuid)?;
s.analyze()?;
println!("{}", s.name.white());
print!(" {}", snip::split_uuid(&s.uuid)[0].bright_blue());

// create and print a summary of terms and counts
let mut terms_summary: HashMap<String, usize> = HashMap::new();
for (term, positions) in &item.matches {
terms_summary.insert(term.clone(), positions.len());
}
print!(" [");
// use argument terms vector to order by term
for (i, term) in terms_include.iter().enumerate() {
if let Some(count) = terms_summary.get(term.as_str()) {
print!("{}: {}", term, count);
if i != terms_summary.len() - 1 {
print!(" ");
}
}
}
print!("]");
println!();

// for each position, gather context and display
for term in &terms_include {
if let Some(positions) = item.matches.get(term.as_str()) {
for (i, pos) in positions.iter().enumerate() {
// if limit is hit, break immediately
if let Some(e_limit) = excerpts {
if i == e_limit {
break;
}
}

// this gathers an excerpt from the supplied position
let excerpt =
s.analysis.get_excerpt(pos, context_words, context_raw)?;
excerpt.print(context_raw);
}
}
}
println!();
}
}

/*
// single term direct data search
for (i, term) in terms_stem.iter().enumerate() {
Expand Down Expand Up @@ -935,7 +950,11 @@ impl ListHeading {
}
}

fn list_items(conn: &Connection, heading: ListHeading, limit: usize) -> Result<(), Box<dyn Error>> {
fn list_items(
conn: &Connection,
heading: ListHeading,
limit: Option<usize>,
) -> Result<(), Box<dyn Error>> {
let ids = match heading.kind {
ListHeadingKind::Document => snip::uuid_list(conn, limit)?,
ListHeadingKind::Attachment => snip::get_attachment_all(conn)?,
Expand Down
4 changes: 2 additions & 2 deletions src/snip/doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -675,10 +675,10 @@ pub fn strip_punctuation(s: &str) -> &str {
}

/// Return a vector of Uuid of all documents in the database
pub fn uuid_list(conn: &Connection, limit: usize) -> Result<Vec<Uuid>, Box<dyn Error>> {
pub fn uuid_list(conn: &Connection, limit: Option<usize>) -> Result<Vec<Uuid>, Box<dyn Error>> {
let mut ids: Vec<Uuid> = Vec::new();

if limit != 0 {
if limit.is_some() {
let mut stmt =
conn.prepare("SELECT uuid FROM snip ORDER BY datetime(timestamp) DESC LIMIT :limit")?;
let query_iter = stmt.query_map(&[(":limit", &limit)], |row| {
Expand Down
11 changes: 10 additions & 1 deletion src/snip/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub struct SearchQuery {
pub terms_optional: Vec<String>, // neither mandatory nor disqualifying, but increase score if present
pub method: SearchMethod, // search the index, document text field, etc.
pub uuids: Vec<Uuid>,
pub limit: Option<usize>,
}

#[derive(Debug)]
Expand Down Expand Up @@ -97,7 +98,12 @@ pub fn search_structured(
}

// BUILD OUTPUT
for uuid in include_results {
for (i, uuid) in include_results.into_iter().enumerate() {
if let Some(limit) = search_query.limit {
if i == limit {
break;
}
}
let mut item = SearchQueryItem {
uuid,
score: None,
Expand Down Expand Up @@ -365,6 +371,7 @@ mod tests {
terms_optional: vec![],
method: SearchMethod::IndexStem,
uuids: vec![],
limit: None,
};

let expect = SearchQueryResult {
Expand Down Expand Up @@ -424,6 +431,7 @@ mod tests {
terms_optional: vec![],
method: SearchMethod::IndexStem,
uuids: vec![id],
limit: None,
};
let result = search_structured(&conn, query)?;
// println!("result: {:#?}", result);
Expand Down Expand Up @@ -455,6 +463,7 @@ mod tests {
terms_optional: vec![],
method: SearchMethod::IndexStem,
uuids: vec![id],
limit: None,
};
let result = search_structured(&conn, query)?;
// println!("result: {:#?}", result);
Expand Down

0 comments on commit 7b5e616

Please sign in to comment.