From c390cdd21e1e204b6e04302622cb17dee6427884 Mon Sep 17 00:00:00 2001 From: Michael Leandersson Date: Mon, 16 Jan 2023 23:07:51 +0100 Subject: [PATCH] Fixed csv infer_schema on empty fields (#1342) * fix csv infer_schema on empty fields * Add test for csv::infer_schema with empty fields --- src/io/csv/read/infer_schema.rs | 4 +++- tests/it/io/csv/read.rs | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/io/csv/read/infer_schema.rs b/src/io/csv/read/infer_schema.rs index 4aa3eaf53ea..c7bd6c84d5f 100644 --- a/src/io/csv/read/infer_schema.rs +++ b/src/io/csv/read/infer_schema.rs @@ -46,7 +46,9 @@ pub fn infer_schema DataType>( for (i, column) in column_types.iter_mut().enumerate() { if let Some(string) = record.get(i) { - column.insert(infer(string)); + if !string.is_empty() { + column.insert(infer(string)); + } } } } diff --git a/tests/it/io/csv/read.rs b/tests/it/io/csv/read.rs index 975c3f3fab1..c1756a71cf8 100644 --- a/tests/it/io/csv/read.rs +++ b/tests/it/io/csv/read.rs @@ -88,6 +88,24 @@ fn infer_ints() -> Result<()> { Ok(()) } +#[test] +fn infer_ints_with_empty_fields() -> Result<()> { + let file = Cursor::new("1,2,3\n1,3,5\n2,,4"); + let mut reader = ReaderBuilder::new().from_reader(file); + + let (fields, _) = infer_schema(&mut reader, Some(10), false, &infer)?; + + assert_eq!( + fields, + vec![ + Field::new("column_1", DataType::Int64, true), + Field::new("column_2", DataType::Int64, true), + Field::new("column_3", DataType::Int64, true), + ] + ); + Ok(()) +} + fn test_deserialize(input: &str, data_type: DataType) -> Result> { let reader = std::io::Cursor::new(input); let mut reader = ReaderBuilder::new().has_headers(false).from_reader(reader);