Skip to content

Commit

Permalink
exposing an intermediary term ord
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Oct 9, 2017
1 parent 48d6fbd commit 6359b3d
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/analyzer/facet_analyzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ mod tests {
let facet = Facet::from_encoded(token.term.clone());
tokens.push(format!("{}", facet));
};
FacetTokenizer.token_stream(facet.encoded()).process(&mut add_token);
FacetTokenizer.token_stream(facet.encoded_str()).process(&mut add_token);
}
assert_eq!(tokens.len(), 3);
assert_eq!(tokens[0], "/top");
Expand Down
15 changes: 8 additions & 7 deletions src/datastruct/stacker/hashmap.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::iter;
use std::mem;
use postings::IntermediateTermId;
use super::heap::{Heap, HeapAllocable, BytesRef};

mod murmurhash2 {
Expand Down Expand Up @@ -186,7 +187,7 @@ impl<'a> HashMap<'a> {
}


pub fn get_or_create<S: AsRef<[u8]>, V: HeapAllocable>(&mut self, key: S) -> &mut V {
pub fn get_or_create<S: AsRef<[u8]>, V: HeapAllocable>(&mut self, key: S) -> (IntermediateTermId, &mut V) {
let key_bytes: &[u8] = key.as_ref();
let hash = murmurhash2::murmurhash2(key.as_ref());
let mut probe = self.probe(hash);
Expand All @@ -198,11 +199,11 @@ impl<'a> HashMap<'a> {
let (addr, val): (u32, &mut V) = self.heap.allocate_object();
assert_eq!(addr, key_bytes_ref.addr() + 2 + key_bytes.len() as u32);
self.set_bucket(hash, key_bytes_ref, bucket);
return val;
return (bucket, val);
} else if kv.hash == hash {
let (stored_key, expull_addr): (&[u8], u32) = self.get_key_value(kv.key_value_addr);
if stored_key == key_bytes {
return self.heap.get_mut_ref(expull_addr);
return (bucket, self.heap.get_mut_ref(expull_addr));
}
}
}
Expand Down Expand Up @@ -248,21 +249,21 @@ mod tests {
let heap = Heap::with_capacity(2_000_000);
let mut hash_map: HashMap = HashMap::new(18, &heap);
{
let v: &mut TestValue = hash_map.get_or_create("abc");
let v: &mut TestValue = hash_map.get_or_create("abc").1;
assert_eq!(v.val, 0u32);
v.val = 3u32;
}
{
let v: &mut TestValue = hash_map.get_or_create("abcd");
let v: &mut TestValue = hash_map.get_or_create("abcd").1;
assert_eq!(v.val, 0u32);
v.val = 4u32;
}
{
let v: &mut TestValue = hash_map.get_or_create("abc");
let v: &mut TestValue = hash_map.get_or_create("abc").1;
assert_eq!(v.val, 3u32);
}
{
let v: &mut TestValue = hash_map.get_or_create("abcd");
let v: &mut TestValue = hash_map.get_or_create("abcd").1;
assert_eq!(v.val, 4u32);
}
let mut iter_values = hash_map.iter();
Expand Down
2 changes: 1 addition & 1 deletion src/datastruct/stacker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fn test_unrolled_linked_list() {
let mut hashmap: HashMap = HashMap::new(10, &heap);
for j in 0..k {
for i in 0..500 {
let v: &mut ExpUnrolledLinkedList = hashmap.get_or_create(i.to_string());
let v: &mut ExpUnrolledLinkedList = hashmap.get_or_create(i.to_string()).1;
v.push(i * j, &heap);
}
}
Expand Down
23 changes: 20 additions & 3 deletions src/indexer/segment_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use schema::Term;
use core::Segment;
use core::SerializableSegment;
use fastfield::FastFieldsWriter;
use analyzer::{Analyzer, TokenStream, FacetTokenizer};
use schema::Field;
use schema::FieldType;
use indexer::segment_serializer::SegmentSerializer;
Expand Down Expand Up @@ -143,8 +144,25 @@ impl<'a> SegmentWriter<'a> {
continue;
}
match *field_options.field_type() {

FieldType::HierarchicalFacet => {
let facets: Vec<&str> = field_values.iter()
.flat_map(|field_value| {
match field_value.value() {
&Value::HierarchicalFacet(ref facet) => Some(facet.encoded_str()),
_ => None
}
})
.collect();
let mut term = unsafe {Term::with_capacity(100)};
term.set_field(field);
for facet in facets {
FacetTokenizer.token_stream(&facet)
.process(&mut |ref token| {
term.set_text(&token.term);
self.multifield_postings.suscribe(doc_id, &term);

});
}

}
FieldType::Str(_) => {
Expand All @@ -160,8 +178,7 @@ impl<'a> SegmentWriter<'a> {
.collect();
let mut token_stream = analyzer.token_stream_texts(&texts[..]);
self.multifield_postings.index_text(doc_id, field, &mut token_stream)
}
else {
} else {
0
};
self.fieldnorms_writer
Expand Down
1 change: 1 addition & 0 deletions src/postings/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub use self::segment_postings::{SegmentPostings, BlockSegmentPostings};
pub use self::intersection::IntersectionDocSet;
pub use common::HasLen;

pub(crate) type IntermediateTermId = usize;

#[cfg(test)]
mod tests {
Expand Down
12 changes: 7 additions & 5 deletions src/postings/postings_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ use schema::FieldEntry;
use schema::FieldType;
use analyzer::TokenStream;
use schema::IndexRecordOption;
use postings::IntermediateTermId;


fn posting_from_field_entry<'a>(
field_entry: &FieldEntry,
Expand Down Expand Up @@ -76,7 +78,7 @@ impl<'a> MultiFieldPostingsWriter<'a> {
postings_writer.index_text(&mut self.term_index, doc, field, token_stream, self.heap)
}

pub fn suscribe(&mut self, doc: DocId, term: &Term) {
pub fn suscribe(&mut self, doc: DocId, term: &Term) -> IntermediateTermId {
let postings_writer = self.per_field_postings_writers[term.field().0 as usize].deref_mut();
postings_writer.suscribe(&mut self.term_index, doc, 0u32, term, self.heap)
}
Expand Down Expand Up @@ -145,7 +147,7 @@ pub trait PostingsWriter {
pos: u32,
term: &Term,
heap: &Heap,
);
) -> IntermediateTermId;

/// Serializes the postings on disk.
/// The actual serialization format is handled by the `PostingsSerializer`.
Expand All @@ -169,7 +171,6 @@ pub trait PostingsWriter {
term.set_text(token.term.as_str());
self.suscribe(term_index, doc_id, token.position as u32, &term, heap);
};

token_stream.process(&mut sink)
}
}
Expand Down Expand Up @@ -205,9 +206,9 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<'
position: u32,
term: &Term,
heap: &Heap,
) {
) -> IntermediateTermId {
debug_assert!(term.as_slice().len() >= 4);
let recorder: &mut Rec = term_index.get_or_create(term);
let (term_ord, recorder): (usize, &mut Rec) = term_index.get_or_create(term);
let current_doc = recorder.current_doc();
if current_doc != doc {
if current_doc != u32::max_value() {
Expand All @@ -216,6 +217,7 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<'
recorder.new_doc(doc, heap);
}
recorder.record_position(position, heap);
term_ord
}


Expand Down
7 changes: 6 additions & 1 deletion src/schema/facet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ enum State {
Idle,
}

impl Facet
impl Facet {


pub(crate) fn encoded_str(&self) -> &str {
&self.0
}

pub(crate) fn from_encoded(encoded_str: String) -> Facet {
Facet(encoded_str)
Expand Down

0 comments on commit 6359b3d

Please sign in to comment.