diff --git a/src/lib.rs b/src/lib.rs index d7c7d28..cd376e8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -208,7 +208,9 @@ fn consume_line_comments(buf: &mut [u8], i: &mut usize) -> State { match memchr::memchr(b'\n', remaining) { Some(offset) => { *i += offset; - buf[cur..*i].fill(b' '); + // Preserve \r if it comes right before \n (Windows line endings) + let end = if *i > 0 && buf[*i - 1] == b'\r' { *i - 1 } else { *i }; + buf[cur..end].fill(b' '); Top } None => { @@ -227,18 +229,42 @@ fn consume_block_comments(buf: &mut [u8], i: &mut usize) -> State { match memchr::memchr(b'*', remaining) { Some(offset) => { *i += offset; - buf[cur..=*i].fill(b' '); + // Preserve newlines in block comments efficiently + fill_non_newlines(&mut buf[cur..=*i]); MaybeCommentEnd } None => { let len = buf.len(); *i = len - 1; - buf[cur..len].fill(b' '); + // Preserve newlines in block comments efficiently + fill_non_newlines(&mut buf[cur..len]); InBlockComment } } } +/// Fill a buffer with spaces, preserving newlines for performance +#[inline] +fn fill_non_newlines(buf: &mut [u8]) { + let mut pos = 0; + while pos < buf.len() { + // Find the next newline (\n or \r) + match memchr::memchr2(b'\n', b'\r', &buf[pos..]) { + Some(offset) => { + // Fill everything before the newline with spaces + buf[pos..pos + offset].fill(b' '); + // Skip the newline character + pos += offset + 1; + } + None => { + // No more newlines, fill the rest + buf[pos..].fill(b' '); + break; + } + } + } +} + #[inline(always)] fn top(c: &mut u8) -> State { match *c { @@ -278,7 +304,10 @@ fn in_comment(c: &mut u8) -> Result { #[inline] fn maybe_comment_end(c: &mut u8) -> State { let old = *c; - *c = b' '; + // Preserve newlines in block comments + if old != b'\n' && old != b'\r' { + *c = b' '; + } match old { b'/' => Top, b'*' => MaybeCommentEnd, diff --git a/tests/main.rs b/tests/main.rs index 3762721..574a59b 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -428,16 +428,13 @@ fn sindresorhus_replace_comments_with_whitespace() { assert_eq!(strip_string("/*//comment*/{\"a\":\"b\"}"), " {\"a\":\"b\"}"); assert_eq!(strip_string("{\"a\":\"b\"//comment\n}"), "{\"a\":\"b\" \n}"); assert_eq!(strip_string("{\"a\":\"b\"/*comment*/}"), "{\"a\":\"b\" }"); - // Note: The Rust implementation replaces newlines in block comments with spaces, - // unlike the JavaScript version which preserves them assert_eq!( strip_string("{\"a\"/*\n\n\ncomment\r\n*/:\"b\"}"), - "{\"a\" :\"b\"}" + "{\"a\" \n\n\n \r\n :\"b\"}" ); - // Note: Same for multi-line comments assert_eq!( strip_string("/*!\n * comment\n */\n{\"a\":\"b\"}"), - " \n{\"a\":\"b\"}" + " \n \n \n{\"a\":\"b\"}" ); assert_eq!(strip_string("{/*comment*/\"a\":\"b\"}"), "{ \"a\":\"b\"}"); } @@ -471,8 +468,7 @@ fn sindresorhus_line_endings_no_comments() { #[test] fn sindresorhus_line_endings_single_line_comment() { assert_eq!(strip_string("{\"a\":\"b\"//c\n}"), "{\"a\":\"b\" \n}"); - // Note: The Rust implementation treats \r\n differently - it replaces the \r as part of comment - assert_eq!(strip_string("{\"a\":\"b\"//c\r\n}"), "{\"a\":\"b\" \n}"); + assert_eq!(strip_string("{\"a\":\"b\"//c\r\n}"), "{\"a\":\"b\" \r\n}"); } #[test] @@ -483,14 +479,13 @@ fn sindresorhus_line_endings_single_line_block_comment() { #[test] fn sindresorhus_line_endings_multi_line_block_comment() { - // Note: The Rust implementation replaces newlines inside block comments with spaces assert_eq!( strip_string("{\"a\":\"b\",/*c\nc2*/\"x\":\"y\"\n}"), - "{\"a\":\"b\", \"x\":\"y\"\n}" + "{\"a\":\"b\", \n \"x\":\"y\"\n}" ); assert_eq!( strip_string("{\"a\":\"b\",/*c\r\nc2*/\"x\":\"y\"\r\n}"), - "{\"a\":\"b\", \"x\":\"y\"\r\n}" + "{\"a\":\"b\", \r\n \"x\":\"y\"\r\n}" ); }