From 523c60a36bf45b4df5e66f3951a91948c22d5261 Mon Sep 17 00:00:00 2001
From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com>
Date: Sat, 20 Jan 2024 22:53:52 -0500
Subject: [PATCH] `schema`: use par_sort_unstable when sorting unique values
 per field

as there can be potentially many values (high cardinality) most of the time, its worth the parallel overhead
---
 src/cmd/schema.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/cmd/schema.rs b/src/cmd/schema.rs
index 780bf187d..94425a030 100644
--- a/src/cmd/schema.rs
+++ b/src/cmd/schema.rs
@@ -88,6 +88,7 @@ use csv::ByteRecord;
 use grex::RegExpBuilder;
 use itertools::Itertools;
 use log::{debug, error, info, warn};
+use rayon::slice::ParallelSliceMut;
 use serde::Deserialize;
 use serde_json::{json, value::Number, Map, Value};
 use stats::Frequencies;
@@ -678,7 +679,7 @@ fn construct_map_of_unique_values(
         let header_string = convert_to_string(header_byte_slice)?;
 
         // sort the values so enum list so schema can be diff'ed between runs
-        unique_values.sort_unstable();
+        unique_values.par_sort_unstable();
 
         // if log::log_enabled!(log::Level::Debug) {
         //     // we do this as this debug is relatively expensive