Merge #665

665: Fixing piles of clippy errors. r=ManyTheFish a=ehiggs ## Related issue No issue fixed. Simply cleaning up some code for clippy on the march towards a clean build when #659 is merged. ## What does this PR do? Most of these are calling clone when the struct supports Copy. Many are using & and &mut on `self` when the function they are called from already has an immutable or mutable borrow so this isn't needed. I tried to stay away from actual changes or places where I'd have to name fresh variables. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Co-authored-by: Ewan Higgs <ewan.higgs@gmail.com>
meilisearch · Oct 20, 2022 · f11a408 · f11a408
2 parents 19b2326 + beb987d
commit f11a408
Show file tree

Hide file tree

Showing 18 changed files with 139 additions and 159 deletions.
diff --git a/milli/src/lib.rs b/milli/src/lib.rs
@@ -195,7 +195,7 @@ pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] {
 /// Returns `true` if the field match one of the faceted fields.
 /// See the function [`is_faceted_by`] below to see what “matching” means.
 pub fn is_faceted(field: &str, faceted_fields: impl IntoIterator<Item = impl AsRef<str>>) -> bool {
-    faceted_fields.into_iter().find(|facet| is_faceted_by(field, facet.as_ref())).is_some()
+    faceted_fields.into_iter().any(|facet| is_faceted_by(field, facet.as_ref()))
 }
 
 /// Returns `true` if the field match the facet.

diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs
@@ -7,9 +7,9 @@ use serde::Serialize;
 
 pub mod matching_words;
 
-const DEFAULT_CROP_MARKER: &'static str = "…";
-const DEFAULT_HIGHLIGHT_PREFIX: &'static str = "<em>";
-const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = "</em>";
+const DEFAULT_CROP_MARKER: &str = "…";
+const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
+const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";
 
 /// Structure used to build a Matcher allowing to customize formating tags.
 pub struct MatcherBuilder<'a, A> {

diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
@@ -357,39 +357,36 @@ pub fn word_derivations<'c>(
                 } else if fst.contains(word) {
                     derived_words.push((word.to_string(), 0));
                 }
+            } else if max_typo == 1 {
+                let dfa = build_dfa(word, 1, is_prefix);
+                let starts = StartsWith(Str::new(get_first(word)));
+                let mut stream = fst.search_with_state(Intersection(starts, &dfa)).into_stream();
+
+                while let Some((word, state)) = stream.next() {
+                    let word = std::str::from_utf8(word)?;
+                    let d = dfa.distance(state.1);
+                    derived_words.push((word.to_string(), d.to_u8()));
+                }
             } else {
-                if max_typo == 1 {
-                    let dfa = build_dfa(word, 1, is_prefix);
-                    let starts = StartsWith(Str::new(get_first(word)));
-                    let mut stream =
-                        fst.search_with_state(Intersection(starts, &dfa)).into_stream();
-
-                    while let Some((word, state)) = stream.next() {
-                        let word = std::str::from_utf8(word)?;
-                        let d = dfa.distance(state.1);
-                        derived_words.push((word.to_string(), d.to_u8()));
-                    }
-                } else {
-                    let starts = StartsWith(Str::new(get_first(word)));
-                    let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
-                    let second_dfa = build_dfa(word, 2, is_prefix);
-                    let second = Intersection(&second_dfa, &starts);
-                    let automaton = Union(first, &second);
-
-                    let mut stream = fst.search_with_state(automaton).into_stream();
-
-                    while let Some((found_word, state)) = stream.next() {
-                        let found_word = std::str::from_utf8(found_word)?;
-                        // in the case the typo is on the first letter, we know the number of typo
-                        // is two
-                        if get_first(found_word) != get_first(word) {
-                            derived_words.push((found_word.to_string(), 2));
-                        } else {
-                            // Else, we know that it is the second dfa that matched and compute the
-                            // correct distance
-                            let d = second_dfa.distance((state.1).0);
-                            derived_words.push((found_word.to_string(), d.to_u8()));
-                        }
+                let starts = StartsWith(Str::new(get_first(word)));
+                let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
+                let second_dfa = build_dfa(word, 2, is_prefix);
+                let second = Intersection(&second_dfa, &starts);
+                let automaton = Union(first, &second);
+
+                let mut stream = fst.search_with_state(automaton).into_stream();
+
+                while let Some((found_word, state)) = stream.next() {
+                    let found_word = std::str::from_utf8(found_word)?;
+                    // in the case the typo is on the first letter, we know the number of typo
+                    // is two
+                    if get_first(found_word) != get_first(word) {
+                        derived_words.push((found_word.to_string(), 2));
+                    } else {
+                        // Else, we know that it is the second dfa that matched and compute the
+                        // correct distance
+                        let d = second_dfa.distance((state.1).0);
+                        derived_words.push((found_word.to_string(), d.to_u8()));
                     }
                 }
             }

diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@@ -40,7 +40,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
     let mut cursor = docid_word_positions.into_cursor()?;
     while let Some((key, value)) = cursor.move_on_next()? {
         let (document_id_bytes, _word_bytes) = try_split_array_at(key)
-            .ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
+            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
         let document_id = u32::from_be_bytes(document_id_bytes);
 
         let curr_document_id = *current_document_id.get_or_insert(document_id);

diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs
@@ -60,5 +60,5 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
         }
     }
 
-    Ok(writer_into_reader(writer)?)
+    writer_into_reader(writer)
 }
diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs
@@ -51,7 +51,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
     let mut cursor = docid_word_positions.into_cursor()?;
     while let Some((key, positions)) = cursor.move_on_next()? {
         let (document_id_bytes, word_bytes) = try_split_array_at(key)
-            .ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
+            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
         let document_id = u32::from_be_bytes(document_id_bytes);
 
         let bitmap = RoaringBitmap::from_iter(Some(document_id));

diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@@ -39,7 +39,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
     let mut cursor = docid_word_positions.into_cursor()?;
     while let Some((key, value)) = cursor.move_on_next()? {
         let (document_id_bytes, word_bytes) = try_split_array_at(key)
-            .ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
+            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
         let document_id = u32::from_be_bytes(document_id_bytes);
         let word = str::from_utf8(word_bytes)?;
 
@@ -81,7 +81,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
 ///
 /// This list is used by the engine to calculate the documents containing words that are
 /// close to each other.
-fn document_word_positions_into_sorter<'b>(
+fn document_word_positions_into_sorter(
     document_id: DocumentId,
     mut word_positions_heap: BinaryHeap<PeekedWordPosition<vec::IntoIter<u32>>>,
     word_pair_proximity_docids_sorter: &mut grenad::Sorter<MergeFn>,

diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@@ -33,7 +33,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
     let mut cursor = docid_word_positions.into_cursor()?;
     while let Some((key, value)) = cursor.move_on_next()? {
         let (document_id_bytes, word_bytes) = try_split_array_at(key)
-            .ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
+            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
         let document_id = DocumentId::from_be_bytes(document_id_bytes);
 
         for position in read_u32_ne_bytes(value) {

diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
@@ -96,7 +96,7 @@ pub(crate) fn data_from_obkv_documents(
 
     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
         docid_word_positions_chunks.clone(),
-        indexer.clone(),
+        indexer,
         lmdb_writer_sx.clone(),
         extract_word_pair_proximity_docids,
         merge_cbo_roaring_bitmaps,
@@ -106,7 +106,7 @@ pub(crate) fn data_from_obkv_documents(
 
     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
         docid_word_positions_chunks.clone(),
-        indexer.clone(),
+        indexer,
         lmdb_writer_sx.clone(),
         extract_fid_word_count_docids,
         merge_cbo_roaring_bitmaps,
@@ -116,7 +116,7 @@ pub(crate) fn data_from_obkv_documents(
 
     spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
         docid_word_positions_chunks.clone(),
-        indexer.clone(),
+        indexer,
         lmdb_writer_sx.clone(),
         move |doc_word_pos, indexer| extract_word_docids(doc_word_pos, indexer, &exact_attributes),
         merge_roaring_bitmaps,
@@ -128,8 +128,8 @@ pub(crate) fn data_from_obkv_documents(
     );
 
     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
-        docid_word_positions_chunks.clone(),
-        indexer.clone(),
+        docid_word_positions_chunks,
+        indexer,
         lmdb_writer_sx.clone(),
         extract_word_position_docids,
         merge_cbo_roaring_bitmaps,
@@ -138,8 +138,8 @@ pub(crate) fn data_from_obkv_documents(
     );
 
     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
-        docid_fid_facet_strings_chunks.clone(),
-        indexer.clone(),
+        docid_fid_facet_strings_chunks,
+        indexer,
         lmdb_writer_sx.clone(),
         extract_facet_string_docids,
         keep_first_prefix_value_merge_roaring_bitmaps,
@@ -148,8 +148,8 @@ pub(crate) fn data_from_obkv_documents(
     );
 
     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
-        docid_fid_facet_numbers_chunks.clone(),
-        indexer.clone(),
+        docid_fid_facet_numbers_chunks,
+        indexer,
         lmdb_writer_sx.clone(),
         extract_facet_number_docids,
         merge_cbo_roaring_bitmaps,
@@ -183,12 +183,12 @@ fn spawn_extraction_task<FE, FS, M>(
 {
     rayon::spawn(move || {
         let chunks: Result<M> =
-            chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer.clone())).collect();
+            chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
         rayon::spawn(move || match chunks {
             Ok(chunks) => {
                 debug!("merge {} database", name);
                 let reader = chunks.merge(merge_fn, &indexer);
-                let _ = lmdb_writer_sx.send(reader.map(|r| serialize_fn(r)));
+                let _ = lmdb_writer_sx.send(reader.map(serialize_fn));
             }
             Err(e) => {
                 let _ = lmdb_writer_sx.send(Err(e));
@@ -255,7 +255,7 @@ fn send_and_extract_flattened_documents_data(
             || {
                 let (documents_ids, docid_word_positions_chunk) = extract_docid_word_positions(
                     flattened_documents_chunk.clone(),
-                    indexer.clone(),
+                    indexer,
                     searchable_fields,
                     stop_words.as_ref(),
                     max_positions_per_attributes,
@@ -279,7 +279,7 @@ fn send_and_extract_flattened_documents_data(
                     fid_facet_exists_docids_chunk,
                 ) = extract_fid_docid_facet_values(
                     flattened_documents_chunk.clone(),
-                    indexer.clone(),
+                    indexer,
                     faceted_fields,
                 )?;
 

diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@@ -61,7 +61,7 @@ pub fn sorter_into_reader(
     );
     sorter.write_into_stream_writer(&mut writer)?;
 
-    Ok(writer_into_reader(writer)?)
+    writer_into_reader(writer)
 }
 
 pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
@@ -134,7 +134,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
         );
         merger.write_into_stream_writer(&mut writer)?;
 
-        Ok(writer_into_reader(writer)?)
+        writer_into_reader(writer)
     }
 }
 
@@ -180,16 +180,15 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
     let mut continue_reading = true;
     let mut cursor = reader.into_cursor()?;
 
-    let indexer_clone = indexer.clone();
     let mut transposer = move || {
         if !continue_reading {
             return Ok(None);
         }
 
         let mut current_chunk_size = 0u64;
         let mut obkv_documents = create_writer(
-            indexer_clone.chunk_compression_type,
-            indexer_clone.chunk_compression_level,
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
             tempfile::tempfile()?,
         );
 
@@ -224,7 +223,7 @@ pub fn write_into_lmdb_database(
         match iter.next().transpose()? {
             Some((key, old_val)) if key == k => {
                 let vals = &[Cow::Borrowed(old_val), Cow::Borrowed(v)][..];
-                let val = merge(k, &vals)?;
+                let val = merge(k, vals)?;
                 // safety: we don't keep references from inside the LMDB database.
                 unsafe { iter.put_current(k, &val)? };
             }

diff --git a/milli/src/update/index_documents/helpers/merge_functions.rs b/milli/src/update/index_documents/helpers/merge_functions.rs
@@ -88,7 +88,7 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
 /// Merge all the obks in the order we see them.
 pub fn merge_obkvs<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
     Ok(obkvs
-        .into_iter()
+        .iter()
         .cloned()
         .reduce(|acc, current| {
             let first = obkv::KvReader::new(&acc);

diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
@@ -106,7 +106,7 @@ where
     ) -> Result<IndexDocuments<'t, 'u, 'i, 'a, F>> {
         let transform = Some(Transform::new(
             wtxn,
-            &index,
+            index,
             indexer_config,
             config.update_method,
             config.autogenerate_docids,
@@ -291,18 +291,12 @@ where
         // Run extraction pipeline in parallel.
         pool.install(|| {
             // split obkv file into several chunks
-            let original_chunk_iter = grenad_obkv_into_chunks(
-                original_documents,
-                pool_params.clone(),
-                documents_chunk_size,
-            );
+            let original_chunk_iter =
+                grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
 
             // split obkv file into several chunks
-            let flattened_chunk_iter = grenad_obkv_into_chunks(
-                flattened_documents,
-                pool_params.clone(),
-                documents_chunk_size,
-            );
+            let flattened_chunk_iter =
+                grenad_obkv_into_chunks(flattened_documents, pool_params, documents_chunk_size);
 
             let result = original_chunk_iter.and_then(|original_chunk| {
                 let flattened_chunk = flattened_chunk_iter?;
@@ -341,7 +335,7 @@ where
         }
 
         let index_documents_ids = self.index.documents_ids(self.wtxn)?;
-        let index_is_empty = index_documents_ids.len() == 0;
+        let index_is_empty = index_documents_ids.is_empty();
         let mut final_documents_ids = RoaringBitmap::new();
         let mut word_pair_proximity_docids = None;
         let mut word_position_docids = None;
@@ -378,7 +372,7 @@ where
             };
 
             let (docids, is_merged_database) =
-                write_typed_chunk_into_index(typed_chunk, &self.index, self.wtxn, index_is_empty)?;
+                write_typed_chunk_into_index(typed_chunk, self.index, self.wtxn, index_is_empty)?;
             if !docids.is_empty() {
                 final_documents_ids |= docids;
                 let documents_seen_count = final_documents_ids.len();
@@ -475,7 +469,7 @@ where
         );
         let common_prefix_fst_words: Vec<_> = common_prefix_fst_words
             .as_slice()
-            .linear_group_by_key(|x| x.chars().nth(0).unwrap())
+            .linear_group_by_key(|x| x.chars().next().unwrap())
             .collect();
 
         // We retrieve the newly added words between the previous and new prefix word fst.
@@ -498,9 +492,9 @@ where
             execute_word_prefix_docids(
                 self.wtxn,
                 word_docids,
-                self.index.word_docids.clone(),
-                self.index.word_prefix_docids.clone(),
-                &self.indexer_config,
+                self.index.word_docids,
+                self.index.word_prefix_docids,
+                self.indexer_config,
                 &new_prefix_fst_words,
                 &common_prefix_fst_words,
                 &del_prefix_fst_words,
@@ -511,9 +505,9 @@ where
             execute_word_prefix_docids(
                 self.wtxn,
                 exact_word_docids,
-                self.index.exact_word_docids.clone(),
-                self.index.exact_word_prefix_docids.clone(),
-                &self.indexer_config,
+                self.index.exact_word_docids,
+                self.index.exact_word_prefix_docids,
+                self.indexer_config,
                 &new_prefix_fst_words,
                 &common_prefix_fst_words,
                 &del_prefix_fst_words,
@@ -595,12 +589,7 @@ fn execute_word_prefix_docids(
     builder.chunk_compression_level = indexer_config.chunk_compression_level;
     builder.max_nb_chunks = indexer_config.max_nb_chunks;
     builder.max_memory = indexer_config.max_memory;
-    builder.execute(
-        cursor,
-        &new_prefix_fst_words,
-        &common_prefix_fst_words,
-        &del_prefix_fst_words,
-    )?;
+    builder.execute(cursor, new_prefix_fst_words, common_prefix_fst_words, del_prefix_fst_words)?;
     Ok(())
 }