Skip to content

Commit

Permalink
Add term count to stats (#375)
Browse files Browse the repository at this point in the history
Co-authored-by: Tago <joel@spyglass.fyi>
  • Loading branch information
travolin and Tago committed Mar 11, 2023
1 parent f2b2ce1 commit ad857d5
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 7 deletions.
4 changes: 4 additions & 0 deletions crates/shared/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ pub enum Event {
SearchResult {
num_results: usize,
num_docs: u64,
term_count: i32,
domains: Vec<String>,
wall_time_ms: u64,
},
Expand Down Expand Up @@ -119,13 +120,16 @@ impl Metrics {
Event::SearchResult {
num_results,
num_docs,
term_count,
domains,
wall_time_ms,
} => {
data.properties
.insert("num_results".into(), num_results.to_owned().into());
data.properties
.insert("num_docs".into(), num_docs.to_owned().into());
data.properties
.insert("term_count".into(), term_count.to_owned().into());
data.properties
.insert("domains".into(), domains.to_owned().into());
data.properties
Expand Down
14 changes: 11 additions & 3 deletions crates/spyglass/src/api/handler/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use entities::sea_orm::{
self, prelude::*, sea_query::Expr, FromQueryResult, JoinType, QueryOrder, QuerySelect,
};
use jsonrpsee::core::Error;
use libspyglass::search::{document_to_struct, Searcher};
use libspyglass::search::{document_to_struct, QueryStats, Searcher};
use libspyglass::state::AppState;
use libspyglass::task::{CleanupTask, ManagerCommand};
use shared::metrics;
Expand Down Expand Up @@ -161,8 +161,15 @@ pub async fn search_docs(
.map(|model| model.id as u64)
.collect::<Vec<u64>>();

let docs =
Searcher::search_with_lens(state.db.clone(), &tag_ids, index, &search_req.query).await;
let mut stats = QueryStats::new();
let docs = Searcher::search_with_lens(
state.db.clone(),
&tag_ids,
index,
&search_req.query,
&mut stats,
)
.await;

let mut results: Vec<SearchResult> = Vec::new();
let mut missing: Vec<(String, String)> = Vec::new();
Expand Down Expand Up @@ -227,6 +234,7 @@ pub async fn search_docs(
.track(metrics::Event::SearchResult {
num_results: results.len(),
num_docs,
term_count: stats.term_count,
domains: domains.iter().cloned().collect(),
wall_time_ms,
})
Expand Down
18 changes: 14 additions & 4 deletions crates/spyglass/src/search/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ pub mod lens;
mod query;
mod utils;

pub use query::QueryStats;

type Score = f32;
type SearchResult = (Score, DocAddress);

Expand Down Expand Up @@ -307,6 +309,7 @@ impl Searcher {
applied_lenses: &Vec<u64>,
searcher: &Searcher,
query_string: &str,
stats: &mut QueryStats,
) -> Vec<SearchResult> {
let start_timer = Instant::now();

Expand Down Expand Up @@ -337,6 +340,7 @@ impl Searcher {
applied_lenses,
tag_boosts.into_iter(),
favorite_boost,
stats,
);

let collector = TopDocs::with_limit(5);
Expand Down Expand Up @@ -532,7 +536,7 @@ pub fn document_to_struct(doc: &Document) -> anyhow::Result<RetrievedDocument> {

#[cfg(test)]
mod test {
use crate::search::{DocumentUpdate, IndexPath, Searcher};
use crate::search::{DocumentUpdate, IndexPath, QueryStats, Searcher};
use entities::models::create_connection;
use shared::config::{Config, LensConfig};

Expand Down Expand Up @@ -641,8 +645,10 @@ mod test {
let mut searcher = Searcher::with_index(&IndexPath::Memory).expect("Unable to open index");
_build_test_index(&mut searcher);

let mut stats = QueryStats::new();
let query = "salinas";
let results = Searcher::search_with_lens(db, &vec![2_u64], &searcher, query).await;
let results =
Searcher::search_with_lens(db, &vec![2_u64], &searcher, query, &mut stats).await;

assert_eq!(results.len(), 1);
}
Expand All @@ -660,9 +666,11 @@ mod test {

let mut searcher = Searcher::with_index(&IndexPath::Memory).expect("Unable to open index");

let mut stats = QueryStats::new();
_build_test_index(&mut searcher);
let query = "salinas";
let results = Searcher::search_with_lens(db, &vec![2_u64], &searcher, query).await;
let results =
Searcher::search_with_lens(db, &vec![2_u64], &searcher, query, &mut stats).await;

assert_eq!(results.len(), 1);
}
Expand All @@ -681,8 +689,10 @@ mod test {
let mut searcher = Searcher::with_index(&IndexPath::Memory).expect("Unable to open index");
_build_test_index(&mut searcher);

let mut stats = QueryStats::new();
let query = "salinasd";
let results = Searcher::search_with_lens(db, &vec![2_u64], &searcher, query).await;
let results =
Searcher::search_with_lens(db, &vec![2_u64], &searcher, query, &mut stats).await;
assert_eq!(results.len(), 0);
}
}
21 changes: 21 additions & 0 deletions crates/spyglass/src/search/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@ use super::DocFields;

type QueryVec = Vec<(Occur, Box<dyn Query>)>;

#[derive(Clone, Debug)]
pub struct QueryStats {
pub term_count: i32,
}

impl Default for QueryStats {
fn default() -> Self {
Self::new()
}
}

impl QueryStats {
pub fn new() -> Self {
QueryStats { term_count: -1 }
}
}

fn _boosted_term(term: Term, boost: Score) -> Box<BoostQuery> {
Box::new(BoostQuery::new(
Box::new(TermQuery::new(
Expand All @@ -22,6 +39,7 @@ fn _boosted_phrase(terms: Vec<Term>, boost: Score) -> Box<BoostQuery> {
Box::new(BoostQuery::new(Box::new(PhraseQuery::new(terms)), boost))
}

#[allow(clippy::too_many_arguments)]
pub fn build_query<I>(
schema: Schema,
tokenizers: TokenizerManager,
Expand All @@ -33,13 +51,16 @@ pub fn build_query<I>(
tag_boosts: I,
// Id of favorited boost
favorite_boost: Option<i64>,
stats: &mut QueryStats,
) -> BooleanQuery
where
I: Iterator<Item = i64>,
{
let content_terms = terms_for_field(&schema, &tokenizers, query_string, fields.content);
let title_terms: Vec<Term> = terms_for_field(&schema, &tokenizers, query_string, fields.title);

stats.term_count = content_terms.len() as i32;

let mut term_query: QueryVec = Vec::new();

// Boost exact matches to the full query string
Expand Down

0 comments on commit ad857d5

Please sign in to comment.