Skip to content

Commit

Permalink
searcher: Allow the TopCollector to order results by a fast unsigned …
Browse files Browse the repository at this point in the history
…field.
  • Loading branch information
poljar committed Jun 12, 2019
1 parent 8a425f3 commit 85b8d0c
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 12 deletions.
61 changes: 49 additions & 12 deletions python/src/searcher.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use pyo3::exceptions;
use pyo3::prelude::*;
use std::any::Any;

use tantivy as tv;

use crate::document::Document;
use crate::query::Query;
use crate::field::Field;

/// Tantivy's Searcher class
///
Expand All @@ -30,19 +32,39 @@ impl Searcher {
/// Raises a ValueError if there was an error with the search.
fn search(
&self,
py: Python,
query: &Query,
collector: &mut TopDocs,
) -> PyResult<Vec<(f32, DocAddress)>> {
let ret = self.inner.search(&query.inner, &collector.inner);
match ret {
Ok(r) => {
let result: Vec<(f32, DocAddress)> = r
.iter()
.map(|(f, d)| (f.clone(), DocAddress::from(d)))
.collect();
Ok(result)
) -> PyResult<Vec<(PyObject, DocAddress)>> {
let collector = &collector.inner;

if let Some(collector) = collector.downcast_ref::<tv::collector::TopDocs>() {
let ret = self.inner.search(&query.inner, collector);
match ret {
Ok(r) => {
let result: Vec<(PyObject, DocAddress)> = r
.iter()
.map(|(f, d)| (f.clone().into_object(py), DocAddress::from(d)))
.collect();
Ok(result)
}
Err(e) => Err(exceptions::ValueError::py_err(e.to_string()))
}
Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),

} else if let Some(collector) = collector.downcast_ref::<tv::collector::TopDocsByField<u64>>() {
let ret = self.inner.search(&query.inner, collector);
match ret {
Ok(r) => {
let result: Vec<(PyObject, DocAddress)> = r
.iter()
.map(|(f, d)| (f.clone().into_object(py), DocAddress::from(d)))
.collect();
Ok(result)
}
Err(e) => return Err(exceptions::ValueError::py_err(e.to_string()))
}
} else {
Err(exceptions::ValueError::py_err("Invalid collector passed."))
}
}

Expand Down Expand Up @@ -117,18 +139,33 @@ impl Into<tv::DocAddress> for &DocAddress {
/// Args:
/// limit (int, optional): The number of documents that the top scorer will
/// retrieve. Must be a positive integer larger than 0. Defaults to 10.
/// order_by_field (Field, optional): A schema field that the results
/// should be ordered by. The field must be declared as a fast field
/// when building the schema. Note, this only works for unsigned fields
/// for now.
#[pyclass]
pub(crate) struct TopDocs {
inner: tv::collector::TopDocs,
inner: Box<Any>,
}

#[pymethods]
impl TopDocs {
#[new]
#[args(limit = 10)]
fn new(obj: &PyRawObject, limit: usize) -> PyResult<()> {
fn new(
obj: &PyRawObject,
limit: usize,
order_by_field: Option<&Field>
) -> PyResult<()> {
let top = tv::collector::TopDocs::with_limit(limit);

let top: Box<Any> = match order_by_field {
Some(o) => Box::<tv::collector::TopDocsByField<u64>>::new(top.order_by_field(o.inner)),
None => Box::new(top)
};

obj.init(TopDocs { inner: top });

Ok(())
}
}
42 changes: 42 additions & 0 deletions python/tests/tantivy_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import datetime
import tantivy


Expand Down Expand Up @@ -83,3 +84,44 @@ def test_doc(self):

assert doc.len == 1
assert not doc.is_empty

def test_order_by_collector(self):
builder = tantivy.SchemaBuilder()

title = builder.add_text_field("title", stored=True)
date_field = builder.add_unsigned_field("date", fast="single")

schema = builder.build()
index = tantivy.Index(schema)

writer = index.writer()

doc = tantivy.Document()
doc.add_text(title, "The Old Man and the Sea")
doc.add_unsigned(date_field, 1559913830)
writer.add_document(doc)

doc = tantivy.Document()
doc.add_text(title, "Of Mice and Men")
doc.add_unsigned(date_field, 1559913833)
writer.add_document(doc)

writer.commit()

reader = index.reader()
searcher = reader.searcher()

query_parser = tantivy.QueryParser.for_index(index, [title])
query = query_parser.parse_query("and")

top_docs = tantivy.TopDocs(10, date_field)

result = searcher.search(query, top_docs)
print(result)

assert len(result) == 2

_, doc_address = result[0]

searched_doc = searcher.doc(doc_address)
assert searched_doc.get_first(title) == "Of Mice and Men"

0 comments on commit 85b8d0c

Please sign in to comment.