Skip to content

Commit

Permalink
fix_lossy (#2585)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 9, 2022
1 parent f7c5062 commit b1de1c2
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions polars/polars-io/src/csv_core/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,16 @@ impl ParsedBuffer<Utf8Type> for Utf8Field {
}
false => {
if matches!(self.encoding, CsvEncoding::LossyUtf8) {
let s = String::from_utf8_lossy(
&self.data.as_slice()[data_len..data_len + n_written],
)
.into_owned();
// Safety:
// we extended to data_len + n_writen
// so the bytes are initialized
debug_assert!(self.data.capacity() >= data_len + n_written);
let slice = unsafe {
self.data
.as_slice()
.get_unchecked(data_len..data_len + n_written)
};
let s = String::from_utf8_lossy(slice).into_owned();
let b = s.as_bytes();
// Make sure that we extend at the proper location,
// otherwise we append valid bytes to invalid utf8 bytes.
Expand Down

0 comments on commit b1de1c2

Please sign in to comment.