From 54665db39e53bbcdfee10c9032731a3e89ded32b Mon Sep 17 00:00:00 2001 From: Michael Leandersson Date: Thu, 22 Dec 2022 12:53:40 +0100 Subject: [PATCH 1/2] fix csv infer_schema on empty fields --- src/io/csv/read/infer_schema.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/io/csv/read/infer_schema.rs b/src/io/csv/read/infer_schema.rs index 4aa3eaf53ea..c7bd6c84d5f 100644 --- a/src/io/csv/read/infer_schema.rs +++ b/src/io/csv/read/infer_schema.rs @@ -46,7 +46,9 @@ pub fn infer_schema DataType>( for (i, column) in column_types.iter_mut().enumerate() { if let Some(string) = record.get(i) { - column.insert(infer(string)); + if !string.is_empty() { + column.insert(infer(string)); + } } } } From 9cc8041edd1ee9238a3c73e26710bd0198a2d3ed Mon Sep 17 00:00:00 2001 From: Michael Leandersson Date: Tue, 10 Jan 2023 10:38:30 +0100 Subject: [PATCH 2/2] Add test for csv::infer_schema with empty fields --- tests/it/io/csv/read.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/it/io/csv/read.rs b/tests/it/io/csv/read.rs index 975c3f3fab1..c1756a71cf8 100644 --- a/tests/it/io/csv/read.rs +++ b/tests/it/io/csv/read.rs @@ -88,6 +88,24 @@ fn infer_ints() -> Result<()> { Ok(()) } +#[test] +fn infer_ints_with_empty_fields() -> Result<()> { + let file = Cursor::new("1,2,3\n1,3,5\n2,,4"); + let mut reader = ReaderBuilder::new().from_reader(file); + + let (fields, _) = infer_schema(&mut reader, Some(10), false, &infer)?; + + assert_eq!( + fields, + vec![ + Field::new("column_1", DataType::Int64, true), + Field::new("column_2", DataType::Int64, true), + Field::new("column_3", DataType::Int64, true), + ] + ); + Ok(()) +} + fn test_deserialize(input: &str, data_type: DataType) -> Result> { let reader = std::io::Cursor::new(input); let mut reader = ReaderBuilder::new().has_headers(false).from_reader(reader);