Skip to content

Commit

Permalink
fix(json): ndjson with trailing newlines (#5787)
Browse files Browse the repository at this point in the history
  • Loading branch information
universalmind303 committed Dec 12, 2022
1 parent 1fe5abf commit 7a56223
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 1 deletion.
7 changes: 6 additions & 1 deletion polars/polars-io/src/ndjson_core/ndjson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,10 +349,15 @@ pub(crate) fn get_line_stats_json(bytes: &[u8], n_lines: usize) -> Option<(f32,

let mut n_read = 0;

let bytes_len = bytes.len();

// sample from start and 75% in the file
for offset in [0, (bytes.len() as f32 * 0.75) as usize] {
for offset in [0, (bytes_len as f32 * 0.75) as usize] {
bytes_trunc = &bytes[offset..];
let pos = next_line_position_naive_json(bytes_trunc)?;
if pos >= bytes_len {
return None;
}
bytes_trunc = &bytes_trunc[pos + 1..];

for _ in offset..(offset + n_lines_per_iter) {
Expand Down
17 changes: 17 additions & 0 deletions polars/tests/it/io/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,20 @@ fn read_unordered_json() {
assert_eq!("d", df.get_columns()[3].name());
assert_eq!((12, 4), df.shape());
}

#[test]
fn read_ndjson_with_trailing_newline() {
let data = r#"{"Column1":"Value1"}\n"#;

let file = Cursor::new(data);
let df = JsonReader::new(file)
.with_json_format(JsonFormat::JsonLines)
.finish()
.unwrap();

let expected = df! {
"Column1" => ["Value1"]
}
.unwrap();
assert!(expected.frame_equal(&df));
}
9 changes: 9 additions & 0 deletions py-polars/tests/unit/io/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,12 @@ def test_write_json2(df: pl.DataFrame) -> None:
file.seek(0)
out = pl.read_json(file)
assert df.frame_equal(out, null_equal=True)


def test_ndjson_with_trailing_newline() -> None:

input = """{"Column1":"Value1"}\n"""

df = pl.read_ndjson(io.StringIO(input))
expected = pl.DataFrame({"Column1": ["Value1"]})
assert df.frame_equal(expected)

0 comments on commit 7a56223

Please sign in to comment.