From d38fba91c21cf7b77d26f0c0de5ffe38375f9dff Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Fri, 19 May 2023 15:16:09 -0400 Subject: [PATCH] Fix handling of binary input (#191) Gracefully handle binary input with from_utf8_lossy. Fixes #189 --- src/lib.rs | 4 ++-- test_files/binary_data.bin | 3 +++ tests/integration.rs | 12 ++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 test_files/binary_data.bin diff --git a/src/lib.rs b/src/lib.rs index 7a9b818..5e459c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -283,8 +283,8 @@ pub mod pipeline { if ct == 0 { break; } - let data = std::str::from_utf8(&line[..ct]).unwrap(); - if self.filter.matches(data) + let data = String::from_utf8_lossy(&line[..ct]); + if self.filter.matches(data.as_ref()) && !Pipeline::proc_preagg(Record::new(data), &mut preaggs, &tx) { break; diff --git a/test_files/binary_data.bin b/test_files/binary_data.bin new file mode 100644 index 0000000..73f452f --- /dev/null +++ b/test_files/binary_data.bin @@ -0,0 +1,3 @@ +line of regular text k=v2 +�K���@�V ���+P��Q�Þ�0����S��oU�!+��X-!����޴A�� �r��[�OC��j_kΕ����V����5; �x1rYuV��� +line of regular text k=v diff --git a/tests/integration.rs b/tests/integration.rs index 61e4d90..17e56b7 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -146,6 +146,18 @@ None 1\n", ); } + #[test] + fn binary_input() { + run() + .args([ + "* | parse 'k=*' as k", + "--file", + "test_files/binary_data.bin", + ]) + .assert() + .stdout("[k=v2]\n[k=v]\n"); + } + #[test] fn filter_wildcard() { run()