diff --git a/src/lib.rs b/src/lib.rs index 4258b24665..5f6b63884c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,114 @@ +/*! + +# Creating a new index, adding documents and searching. + +``` +# extern crate rustc_serialize; +# extern crate tantivy; +# use std::fs; +use tantivy::{Document, Index}; +use tantivy::schema::{Schema, TEXT, STORED}; +use tantivy::collector::TopCollector; +use tantivy::query::QueryParser; +use tantivy::query::Query; +use std::path::PathBuf; + +# fn main() { +# fn wrapper_err() -> tantivy::Result<()> { +// We need to declare a schema +// to create a new index. +let mut schema = Schema::new(); + +// TEXT | STORED is some syntax to describe +// how tantivy should index this field. +// It means the field should be tokenized and indexed, +// along with its term frequency and term positions. +let title = schema.add_text_field("title", TEXT | STORED); +let body = schema.add_text_field("body", TEXT); +// the path in which our index will be created. +# fs::create_dir("./tantivy-index").unwrap(); +let index_path = PathBuf::from("./tantivy-index"); +// this will actually just create a meta.json +// file in the directory. +let index = try!(Index::create(&index_path, schema)); + +// There can be only one writer at one time. +// The writer will use more than one thread +// to use your CPU. +let mut index_writer = try!(index.writer()); + + +// Let's now create one document and index it. +let mut doc = Document::new(); +doc.add_text(title, "The Old Man and the Sea"); +doc.add_text(body, "He was an old man who fished alone in a skiff in the Gulf Stream and he had gone eighty-four +days now without taking a fish."); + +// We can now add our document +try!(index_writer.add_document(doc)); + +// ... in the real world, we would add way more documents +// here. + +// At this point the document is not indexed. +// It has been pushed to a queue where +// it will be eventually processed. +// +// There is even no guarantee that +// the document will be indexed if there +// is a power outage for instance. +// +// We can call .wait() to force the index_writer to +// commit to disk. +try!(index_writer.wait()); + +// At this point we are guaranteed that +// all documents that were added are index, and +// ready for search. +// + +// Let's search our index. This starts +// by creating a searcher. There can be more +// than one search at a time. +let searcher = try!(index.searcher()); + +// The query parser can interpret human queries. +// Here, if the user does not specify which +// field he wants to search, tantivy will search in both title and body. +let query_parser = QueryParser::new(index.schema(), vec!(title, body)); +let query = query_parser.parse_query("sea whale").unwrap(); + +// A query defines a set of documents, as +// well as the way they should be scored. +// By default the query_parser is scoring according +// to a metric called TfIdf, and will consider +// any document matching at least one of our terms. + +// We are not interested in all of the document but +// only in the top 10. +let mut top_collector = TopCollector::with_limit(10); + +try!(query.search(&searcher, &mut top_collector)); + +// Our top collector now contains are 10 +// most relevant doc ids... +let doc_ids = top_collector.docs(); + +// The actual documents still need to be +// retrieved from Tantivy's store. +// Since body was not configured as stored, +// the document returned will only contain +// a title. +let retrieved_doc = searcher.doc(&doc_ids[0]); +# Ok(()) +# } +# wrapper_err().unwrap(); +# fs::remove_dir_all("./tantivy-index").unwrap(); +# } + +``` +*/ + #![feature(binary_heap_extras)] #![cfg_attr(test, feature(test))] #![cfg_attr(test, feature(step_by))] @@ -63,6 +174,7 @@ pub use schema::Document; pub use core::SegmentReader; pub use self::common::TimerTree; + pub use postings::DocSet; pub use postings::Postings; pub use postings::SegmentPostingsOption; diff --git a/src/query/query_parser.rs b/src/query/query_parser.rs index bcc3c2da60..3a730b390b 100644 --- a/src/query/query_parser.rs +++ b/src/query/query_parser.rs @@ -205,42 +205,42 @@ mod tests { #[test] pub fn test_query_grammar() { - let mut query_parser = parser(query_language); - assert_eq!(query_parser.parse("abc:toto").unwrap().0, + let mut grammar_parser = parser(query_language); + assert_eq!(grammar_parser.parse("abc:toto").unwrap().0, vec!( (Occur::Should, Literal::WithField(String::from("abc"), String::from("toto"))) ) ); assert_eq!( - query_parser.parse("\"some phrase query\"").unwrap().0, + grammar_parser.parse("\"some phrase query\"").unwrap().0, vec!( (Occur::Should, Literal::DefaultField(String::from("some phrase query"))), ) ); assert_eq!( - query_parser.parse("field:\"some phrase query\"").unwrap().0, + grammar_parser.parse("field:\"some phrase query\"").unwrap().0, vec!( (Occur::Should, Literal::WithField(String::from("field"), String::from("some phrase query"))) )); - assert_eq!(query_parser.parse("field:\"some phrase query\" field:toto a").unwrap().0, + assert_eq!(grammar_parser.parse("field:\"some phrase query\" field:toto a").unwrap().0, vec!( (Occur::Should, Literal::WithField(String::from("field"), String::from("some phrase query"))), (Occur::Should, Literal::WithField(String::from("field"), String::from("toto"))), (Occur::Should, Literal::DefaultField(String::from("a"))), )); - assert_eq!(query_parser.parse("field:\"a ! b\"").unwrap().0, + assert_eq!(grammar_parser.parse("field:\"a ! b\"").unwrap().0, vec!( (Occur::Should, Literal::WithField(String::from("field"), String::from("a ! b"))), )); - assert_eq!(query_parser.parse("field:a9e3").unwrap().0, + assert_eq!(grammar_parser.parse("field:a9e3").unwrap().0, vec!( (Occur::Should, Literal::WithField(String::from("field"), String::from("a9e3")),) )); - assert_eq!(query_parser.parse("a9e3").unwrap().0, + assert_eq!(grammar_parser.parse("a9e3").unwrap().0, vec!( (Occur::Should, Literal::DefaultField(String::from("a9e3"))), )); - assert_eq!(query_parser.parse("field:タンタイビーって早い").unwrap().0, + assert_eq!(grammar_parser.parse("field:タンタイビーって早い").unwrap().0, vec!( (Occur::Should, Literal::WithField(String::from("field"), String::from("タンタイビーって早い"))), )); diff --git a/src/schema/schema.rs b/src/schema/schema.rs index f40e27d00f..9001d1df06 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -143,6 +143,12 @@ impl Schema { } NamedFieldDocument(field_map) } + + pub fn to_json(&self, doc: &Document) -> String { + // encoding a document cannot fail. + json::encode(&self.to_named_doc(doc)).unwrap() + } + /// Build a document object from a json-object. pub fn parse_document(&self, doc_json: &str) -> Result { let json_node = try!(Json::from_str(doc_json));