Skip to content

Commit

Permalink
schema: use par_sort_unstable when sorting unique values per field
Browse files Browse the repository at this point in the history
as there can be potentially many values (high cardinality) most of the time, its worth the parallel overhead
  • Loading branch information
jqnatividad committed Jan 21, 2024
1 parent 41ad882 commit 523c60a
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/cmd/schema.rs
Expand Up @@ -88,6 +88,7 @@ use csv::ByteRecord;
use grex::RegExpBuilder;
use itertools::Itertools;
use log::{debug, error, info, warn};
use rayon::slice::ParallelSliceMut;
use serde::Deserialize;
use serde_json::{json, value::Number, Map, Value};
use stats::Frequencies;
Expand Down Expand Up @@ -678,7 +679,7 @@ fn construct_map_of_unique_values(
let header_string = convert_to_string(header_byte_slice)?;

// sort the values so enum list so schema can be diff'ed between runs
unique_values.sort_unstable();
unique_values.par_sort_unstable();

// if log::log_enabled!(log::Level::Debug) {
// // we do this as this debug is relatively expensive
Expand Down

0 comments on commit 523c60a

Please sign in to comment.