Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Merge #665
Browse files Browse the repository at this point in the history
665: Fixing piles of clippy errors. r=ManyTheFish a=ehiggs

## Related issue
No issue fixed. Simply cleaning up some code for clippy on the march towards a clean build when #659 is merged.

## What does this PR do?
Most of these are calling clone when the struct supports Copy.

Many are using & and &mut on `self` when the function they are called from already has an immutable or mutable borrow so this isn't needed.

I tried to stay away from actual changes or places where I'd have to name fresh variables.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Co-authored-by: Ewan Higgs <ewan.higgs@gmail.com>
  • Loading branch information
bors[bot] and ehiggs committed Oct 20, 2022
2 parents 19b2326 + beb987d commit f11a408
Show file tree
Hide file tree
Showing 18 changed files with 139 additions and 159 deletions.
2 changes: 1 addition & 1 deletion milli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] {
/// Returns `true` if the field match one of the faceted fields.
/// See the function [`is_faceted_by`] below to see what “matching” means.
pub fn is_faceted(field: &str, faceted_fields: impl IntoIterator<Item = impl AsRef<str>>) -> bool {
faceted_fields.into_iter().find(|facet| is_faceted_by(field, facet.as_ref())).is_some()
faceted_fields.into_iter().any(|facet| is_faceted_by(field, facet.as_ref()))
}

/// Returns `true` if the field match the facet.
Expand Down
6 changes: 3 additions & 3 deletions milli/src/search/matches/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ use serde::Serialize;

pub mod matching_words;

const DEFAULT_CROP_MARKER: &'static str = "…";
const DEFAULT_HIGHLIGHT_PREFIX: &'static str = "<em>";
const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = "</em>";
const DEFAULT_CROP_MARKER: &str = "…";
const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";

/// Structure used to build a Matcher allowing to customize formating tags.
pub struct MatcherBuilder<'a, A> {
Expand Down
61 changes: 29 additions & 32 deletions milli/src/search/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,39 +357,36 @@ pub fn word_derivations<'c>(
} else if fst.contains(word) {
derived_words.push((word.to_string(), 0));
}
} else if max_typo == 1 {
let dfa = build_dfa(word, 1, is_prefix);
let starts = StartsWith(Str::new(get_first(word)));
let mut stream = fst.search_with_state(Intersection(starts, &dfa)).into_stream();

while let Some((word, state)) = stream.next() {
let word = std::str::from_utf8(word)?;
let d = dfa.distance(state.1);
derived_words.push((word.to_string(), d.to_u8()));
}
} else {
if max_typo == 1 {
let dfa = build_dfa(word, 1, is_prefix);
let starts = StartsWith(Str::new(get_first(word)));
let mut stream =
fst.search_with_state(Intersection(starts, &dfa)).into_stream();

while let Some((word, state)) = stream.next() {
let word = std::str::from_utf8(word)?;
let d = dfa.distance(state.1);
derived_words.push((word.to_string(), d.to_u8()));
}
} else {
let starts = StartsWith(Str::new(get_first(word)));
let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
let second_dfa = build_dfa(word, 2, is_prefix);
let second = Intersection(&second_dfa, &starts);
let automaton = Union(first, &second);

let mut stream = fst.search_with_state(automaton).into_stream();

while let Some((found_word, state)) = stream.next() {
let found_word = std::str::from_utf8(found_word)?;
// in the case the typo is on the first letter, we know the number of typo
// is two
if get_first(found_word) != get_first(word) {
derived_words.push((found_word.to_string(), 2));
} else {
// Else, we know that it is the second dfa that matched and compute the
// correct distance
let d = second_dfa.distance((state.1).0);
derived_words.push((found_word.to_string(), d.to_u8()));
}
let starts = StartsWith(Str::new(get_first(word)));
let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
let second_dfa = build_dfa(word, 2, is_prefix);
let second = Intersection(&second_dfa, &starts);
let automaton = Union(first, &second);

let mut stream = fst.search_with_state(automaton).into_stream();

while let Some((found_word, state)) = stream.next() {
let found_word = std::str::from_utf8(found_word)?;
// in the case the typo is on the first letter, we know the number of typo
// is two
if get_first(found_word) != get_first(word) {
derived_words.push((found_word.to_string(), 2));
} else {
// Else, we know that it is the second dfa that matched and compute the
// correct distance
let d = second_dfa.distance((state.1).0);
derived_words.push((found_word.to_string(), d.to_u8()));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
let mut cursor = docid_word_positions.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
let (document_id_bytes, _word_bytes) = try_split_array_at(key)
.ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes);

let curr_document_id = *current_document_id.get_or_insert(document_id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,5 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
}
}

Ok(writer_into_reader(writer)?)
writer_into_reader(writer)
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
let mut cursor = docid_word_positions.into_cursor()?;
while let Some((key, positions)) = cursor.move_on_next()? {
let (document_id_bytes, word_bytes) = try_split_array_at(key)
.ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes);

let bitmap = RoaringBitmap::from_iter(Some(document_id));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
let mut cursor = docid_word_positions.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
let (document_id_bytes, word_bytes) = try_split_array_at(key)
.ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes);
let word = str::from_utf8(word_bytes)?;

Expand Down Expand Up @@ -81,7 +81,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
///
/// This list is used by the engine to calculate the documents containing words that are
/// close to each other.
fn document_word_positions_into_sorter<'b>(
fn document_word_positions_into_sorter(
document_id: DocumentId,
mut word_positions_heap: BinaryHeap<PeekedWordPosition<vec::IntoIter<u32>>>,
word_pair_proximity_docids_sorter: &mut grenad::Sorter<MergeFn>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
let mut cursor = docid_word_positions.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
let (document_id_bytes, word_bytes) = try_split_array_at(key)
.ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = DocumentId::from_be_bytes(document_id_bytes);

for position in read_u32_ne_bytes(value) {
Expand Down
26 changes: 13 additions & 13 deletions milli/src/update/index_documents/extract/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ pub(crate) fn data_from_obkv_documents(

spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_word_positions_chunks.clone(),
indexer.clone(),
indexer,
lmdb_writer_sx.clone(),
extract_word_pair_proximity_docids,
merge_cbo_roaring_bitmaps,
Expand All @@ -106,7 +106,7 @@ pub(crate) fn data_from_obkv_documents(

spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_word_positions_chunks.clone(),
indexer.clone(),
indexer,
lmdb_writer_sx.clone(),
extract_fid_word_count_docids,
merge_cbo_roaring_bitmaps,
Expand All @@ -116,7 +116,7 @@ pub(crate) fn data_from_obkv_documents(

spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
docid_word_positions_chunks.clone(),
indexer.clone(),
indexer,
lmdb_writer_sx.clone(),
move |doc_word_pos, indexer| extract_word_docids(doc_word_pos, indexer, &exact_attributes),
merge_roaring_bitmaps,
Expand All @@ -128,8 +128,8 @@ pub(crate) fn data_from_obkv_documents(
);

spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_word_positions_chunks.clone(),
indexer.clone(),
docid_word_positions_chunks,
indexer,
lmdb_writer_sx.clone(),
extract_word_position_docids,
merge_cbo_roaring_bitmaps,
Expand All @@ -138,8 +138,8 @@ pub(crate) fn data_from_obkv_documents(
);

spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_fid_facet_strings_chunks.clone(),
indexer.clone(),
docid_fid_facet_strings_chunks,
indexer,
lmdb_writer_sx.clone(),
extract_facet_string_docids,
keep_first_prefix_value_merge_roaring_bitmaps,
Expand All @@ -148,8 +148,8 @@ pub(crate) fn data_from_obkv_documents(
);

spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_fid_facet_numbers_chunks.clone(),
indexer.clone(),
docid_fid_facet_numbers_chunks,
indexer,
lmdb_writer_sx.clone(),
extract_facet_number_docids,
merge_cbo_roaring_bitmaps,
Expand Down Expand Up @@ -183,12 +183,12 @@ fn spawn_extraction_task<FE, FS, M>(
{
rayon::spawn(move || {
let chunks: Result<M> =
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer.clone())).collect();
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
rayon::spawn(move || match chunks {
Ok(chunks) => {
debug!("merge {} database", name);
let reader = chunks.merge(merge_fn, &indexer);
let _ = lmdb_writer_sx.send(reader.map(|r| serialize_fn(r)));
let _ = lmdb_writer_sx.send(reader.map(serialize_fn));
}
Err(e) => {
let _ = lmdb_writer_sx.send(Err(e));
Expand Down Expand Up @@ -255,7 +255,7 @@ fn send_and_extract_flattened_documents_data(
|| {
let (documents_ids, docid_word_positions_chunk) = extract_docid_word_positions(
flattened_documents_chunk.clone(),
indexer.clone(),
indexer,
searchable_fields,
stop_words.as_ref(),
max_positions_per_attributes,
Expand All @@ -279,7 +279,7 @@ fn send_and_extract_flattened_documents_data(
fid_facet_exists_docids_chunk,
) = extract_fid_docid_facet_values(
flattened_documents_chunk.clone(),
indexer.clone(),
indexer,
faceted_fields,
)?;

Expand Down
11 changes: 5 additions & 6 deletions milli/src/update/index_documents/helpers/grenad_helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ pub fn sorter_into_reader(
);
sorter.write_into_stream_writer(&mut writer)?;

Ok(writer_into_reader(writer)?)
writer_into_reader(writer)
}

pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
Expand Down Expand Up @@ -134,7 +134,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
);
merger.write_into_stream_writer(&mut writer)?;

Ok(writer_into_reader(writer)?)
writer_into_reader(writer)
}
}

Expand Down Expand Up @@ -180,16 +180,15 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
let mut continue_reading = true;
let mut cursor = reader.into_cursor()?;

let indexer_clone = indexer.clone();
let mut transposer = move || {
if !continue_reading {
return Ok(None);
}

let mut current_chunk_size = 0u64;
let mut obkv_documents = create_writer(
indexer_clone.chunk_compression_type,
indexer_clone.chunk_compression_level,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);

Expand Down Expand Up @@ -224,7 +223,7 @@ pub fn write_into_lmdb_database(
match iter.next().transpose()? {
Some((key, old_val)) if key == k => {
let vals = &[Cow::Borrowed(old_val), Cow::Borrowed(v)][..];
let val = merge(k, &vals)?;
let val = merge(k, vals)?;
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(k, &val)? };
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
/// Merge all the obks in the order we see them.
pub fn merge_obkvs<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
Ok(obkvs
.into_iter()
.iter()
.cloned()
.reduce(|acc, current| {
let first = obkv::KvReader::new(&acc);
Expand Down
41 changes: 15 additions & 26 deletions milli/src/update/index_documents/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ where
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, F>> {
let transform = Some(Transform::new(
wtxn,
&index,
index,
indexer_config,
config.update_method,
config.autogenerate_docids,
Expand Down Expand Up @@ -291,18 +291,12 @@ where
// Run extraction pipeline in parallel.
pool.install(|| {
// split obkv file into several chunks
let original_chunk_iter = grenad_obkv_into_chunks(
original_documents,
pool_params.clone(),
documents_chunk_size,
);
let original_chunk_iter =
grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);

// split obkv file into several chunks
let flattened_chunk_iter = grenad_obkv_into_chunks(
flattened_documents,
pool_params.clone(),
documents_chunk_size,
);
let flattened_chunk_iter =
grenad_obkv_into_chunks(flattened_documents, pool_params, documents_chunk_size);

let result = original_chunk_iter.and_then(|original_chunk| {
let flattened_chunk = flattened_chunk_iter?;
Expand Down Expand Up @@ -341,7 +335,7 @@ where
}

let index_documents_ids = self.index.documents_ids(self.wtxn)?;
let index_is_empty = index_documents_ids.len() == 0;
let index_is_empty = index_documents_ids.is_empty();
let mut final_documents_ids = RoaringBitmap::new();
let mut word_pair_proximity_docids = None;
let mut word_position_docids = None;
Expand Down Expand Up @@ -378,7 +372,7 @@ where
};

let (docids, is_merged_database) =
write_typed_chunk_into_index(typed_chunk, &self.index, self.wtxn, index_is_empty)?;
write_typed_chunk_into_index(typed_chunk, self.index, self.wtxn, index_is_empty)?;
if !docids.is_empty() {
final_documents_ids |= docids;
let documents_seen_count = final_documents_ids.len();
Expand Down Expand Up @@ -475,7 +469,7 @@ where
);
let common_prefix_fst_words: Vec<_> = common_prefix_fst_words
.as_slice()
.linear_group_by_key(|x| x.chars().nth(0).unwrap())
.linear_group_by_key(|x| x.chars().next().unwrap())
.collect();

// We retrieve the newly added words between the previous and new prefix word fst.
Expand All @@ -498,9 +492,9 @@ where
execute_word_prefix_docids(
self.wtxn,
word_docids,
self.index.word_docids.clone(),
self.index.word_prefix_docids.clone(),
&self.indexer_config,
self.index.word_docids,
self.index.word_prefix_docids,
self.indexer_config,
&new_prefix_fst_words,
&common_prefix_fst_words,
&del_prefix_fst_words,
Expand All @@ -511,9 +505,9 @@ where
execute_word_prefix_docids(
self.wtxn,
exact_word_docids,
self.index.exact_word_docids.clone(),
self.index.exact_word_prefix_docids.clone(),
&self.indexer_config,
self.index.exact_word_docids,
self.index.exact_word_prefix_docids,
self.indexer_config,
&new_prefix_fst_words,
&common_prefix_fst_words,
&del_prefix_fst_words,
Expand Down Expand Up @@ -595,12 +589,7 @@ fn execute_word_prefix_docids(
builder.chunk_compression_level = indexer_config.chunk_compression_level;
builder.max_nb_chunks = indexer_config.max_nb_chunks;
builder.max_memory = indexer_config.max_memory;
builder.execute(
cursor,
&new_prefix_fst_words,
&common_prefix_fst_words,
&del_prefix_fst_words,
)?;
builder.execute(cursor, new_prefix_fst_words, common_prefix_fst_words, del_prefix_fst_words)?;
Ok(())
}

Expand Down
Loading

0 comments on commit f11a408

Please sign in to comment.