Skip to content

Commit

Permalink
Fixed csv infer_schema on empty fields (jorgecarleitao#1342)
Browse files Browse the repository at this point in the history
* fix csv infer_schema on empty fields

* Add test for csv::infer_schema with empty fields
  • Loading branch information
tripokey authored and ritchie46 committed Mar 29, 2023
1 parent 380d7dc commit c390cdd
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/io/csv/read/infer_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ pub fn infer_schema<R: Read + Seek, F: Fn(&[u8]) -> DataType>(

for (i, column) in column_types.iter_mut().enumerate() {
if let Some(string) = record.get(i) {
column.insert(infer(string));
if !string.is_empty() {
column.insert(infer(string));
}
}
}
}
Expand Down
18 changes: 18 additions & 0 deletions tests/it/io/csv/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,24 @@ fn infer_ints() -> Result<()> {
Ok(())
}

#[test]
fn infer_ints_with_empty_fields() -> Result<()> {
let file = Cursor::new("1,2,3\n1,3,5\n2,,4");
let mut reader = ReaderBuilder::new().from_reader(file);

let (fields, _) = infer_schema(&mut reader, Some(10), false, &infer)?;

assert_eq!(
fields,
vec![
Field::new("column_1", DataType::Int64, true),
Field::new("column_2", DataType::Int64, true),
Field::new("column_3", DataType::Int64, true),
]
);
Ok(())
}

fn test_deserialize(input: &str, data_type: DataType) -> Result<Box<dyn Array>> {
let reader = std::io::Cursor::new(input);
let mut reader = ReaderBuilder::new().has_headers(false).from_reader(reader);
Expand Down

0 comments on commit c390cdd

Please sign in to comment.