Skip to content

Commit

Permalink
Detect when comments disappear
Browse files Browse the repository at this point in the history
When the reformatted code doesn't contain the same quantity of comments
as the original code, use the original code instead of the reformatted
code.
This is done for all expressions and `let` statements.

This should be used at the finest grained level possible, to avoid that
a small disappearing comment prevents a big chunk of code to be
reformatted.

Kind of fixes (avoid disappearing comments, but prevents a good
formatting is such case) #285 #225 #563 #743
  • Loading branch information
cassiersg committed Jan 10, 2016
1 parent 66abad9 commit 9f98f72
Show file tree
Hide file tree
Showing 5 changed files with 277 additions and 43 deletions.
262 changes: 225 additions & 37 deletions src/comment.rs
Expand Up @@ -8,13 +8,17 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// Format comments.
// Formatting and tools for comments.

use std::iter;
use std::{self, iter};

use syntax::codemap::Span;

use Indent;
use config::Config;
use rewrite::RewriteContext;
use string::{StringFormat, rewrite_string};
use utils::wrap_str;

pub fn rewrite_comment(orig: &str,
block_style: bool,
Expand Down Expand Up @@ -150,7 +154,7 @@ impl FindUncommented for str {
}
Some(c) => {
match kind {
CodeCharKind::Normal if b == c => {}
FullCodeCharKind::Normal if b == c => {}
_ => {
needle_iter = pat.chars();
}
Expand All @@ -174,7 +178,7 @@ impl FindUncommented for str {
pub fn find_comment_end(s: &str) -> Option<usize> {
let mut iter = CharClasses::new(s.char_indices());
for (kind, (i, _c)) in &mut iter {
if kind == CodeCharKind::Normal {
if kind == FullCodeCharKind::Normal {
return Some(i);
}
}
Expand All @@ -189,7 +193,7 @@ pub fn find_comment_end(s: &str) -> Option<usize> {

/// Returns true if text contains any comment.
pub fn contains_comment(text: &str) -> bool {
CharClasses::new(text.chars()).any(|(kind, _)| kind == CodeCharKind::Comment)
CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
}

struct CharClasses<T>
Expand Down Expand Up @@ -240,6 +244,33 @@ pub enum CodeCharKind {
Comment,
}

#[derive(PartialEq, Eq, Debug, Clone, Copy)]
enum FullCodeCharKind {
Normal,
StartComment,
InComment,
EndComment,
}

impl FullCodeCharKind {
fn is_comment(&self) -> bool {
match *self {
FullCodeCharKind::Normal => false,
FullCodeCharKind::StartComment |
FullCodeCharKind::InComment |
FullCodeCharKind::EndComment => true,
}
}

fn to_codecharkind(&self) -> CodeCharKind {
if self.is_comment() {
CodeCharKind::Comment
} else {
CodeCharKind::Normal
}
}
}

impl<T> CharClasses<T>
where T: Iterator,
T::Item: RichChar
Expand All @@ -256,9 +287,9 @@ impl<T> Iterator for CharClasses<T>
where T: Iterator,
T::Item: RichChar
{
type Item = (CodeCharKind, T::Item);
type Item = (FullCodeCharKind, T::Item);

fn next(&mut self) -> Option<(CodeCharKind, T::Item)> {
fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
let item = try_opt!(self.base.next());
let chr = item.get_char();
self.status = match self.status {
Expand Down Expand Up @@ -286,11 +317,11 @@ impl<T> Iterator for CharClasses<T>
match self.base.peek() {
Some(next) if next.get_char() == '*' => {
self.status = CharClassesStatus::BlockCommentOpening(1);
return Some((CodeCharKind::Comment, item));
return Some((FullCodeCharKind::StartComment, item));
}
Some(next) if next.get_char() == '/' => {
self.status = CharClassesStatus::LineComment;
return Some((CodeCharKind::Comment, item));
return Some((FullCodeCharKind::StartComment, item));
}
_ => CharClassesStatus::Normal,
}
Expand All @@ -299,12 +330,7 @@ impl<T> Iterator for CharClasses<T>
}
}
CharClassesStatus::BlockComment(deepness) => {
if deepness == 0 {
// This is the closing '/'
assert_eq!(chr, '/');
self.status = CharClassesStatus::Normal;
return Some((CodeCharKind::Comment, item));
}
assert!(deepness != 0);
self.status = match self.base.peek() {
Some(next) if next.get_char() == '/' && chr == '*' => {
CharClassesStatus::BlockCommentClosing(deepness - 1)
Expand All @@ -314,34 +340,92 @@ impl<T> Iterator for CharClasses<T>
}
_ => CharClassesStatus::BlockComment(deepness),
};
return Some((CodeCharKind::Comment, item));
return Some((FullCodeCharKind::InComment, item));
}
CharClassesStatus::BlockCommentOpening(deepness) => {
assert_eq!(chr, '*');
self.status = CharClassesStatus::BlockComment(deepness);
return Some((CodeCharKind::Comment, item));
return Some((FullCodeCharKind::InComment, item));
}
CharClassesStatus::BlockCommentClosing(deepness) => {
assert_eq!(chr, '/');
self.status = if deepness == 0 {
CharClassesStatus::Normal
if deepness == 0 {
self.status = CharClassesStatus::Normal;
return Some((FullCodeCharKind::EndComment, item));
} else {
CharClassesStatus::BlockComment(deepness)
};
return Some((CodeCharKind::Comment, item));
self.status = CharClassesStatus::BlockComment(deepness);
return Some((FullCodeCharKind::InComment, item));
}
}
CharClassesStatus::LineComment => {
self.status = match chr {
'\n' => CharClassesStatus::Normal,
_ => CharClassesStatus::LineComment,
};
return Some((CodeCharKind::Comment, item));
match chr {
'\n' => {
self.status = CharClassesStatus::Normal;
return Some((FullCodeCharKind::EndComment, item));
}
_ => {
self.status = CharClassesStatus::LineComment;
return Some((FullCodeCharKind::InComment, item));
}
}
}
};
Some((FullCodeCharKind::Normal, item))
}
}

/// Iterator over functional and commented parts of a string. Any part of a string is either
/// functional code, either *one* block comment, either *one* line comment. Whitespace between
/// comments is functional code. Line comments contain their ending newlines.
struct UngroupedCommentCodeSlices<'a> {
slice: &'a str,
iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
}

impl<'a> UngroupedCommentCodeSlices<'a> {
fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
UngroupedCommentCodeSlices {
slice: code,
iter: CharClasses::new(code.char_indices()).peekable(),
}
}
}

impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
type Item = (CodeCharKind, usize, &'a str);

fn next(&mut self) -> Option<Self::Item> {
let (kind, (start_idx, _)) = try_opt!(self.iter.next());
match kind {
FullCodeCharKind::Normal => {
// Consume all the Normal code
while let Some(&(FullCodeCharKind::Normal, (_, _))) = self.iter.peek() {
let _ = self.iter.next();
}
}
FullCodeCharKind::StartComment => {
// Consume the whole comment
while let Some((FullCodeCharKind::InComment, (_, _))) = self.iter.next() {}
}
_ => panic!(),
}
let slice = match self.iter.peek() {
Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
None => &self.slice[start_idx..],
};
Some((CodeCharKind::Normal, item))
Some((if kind.is_comment() {
CodeCharKind::Comment
} else {
CodeCharKind::Normal
},
start_idx,
slice))
}
}




/// Iterator over an alternating sequence of functional and commented parts of
/// a string. The first item is always a, possibly zero length, subslice of
/// functional text. Line style comments contain their ending newlines.
Expand Down Expand Up @@ -383,7 +467,7 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
first_whitespace = Some(i);
}

if kind == self.last_slice_kind && !is_comment_connector {
if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
let last_index = match first_whitespace {
Some(j) => j,
None => i,
Expand Down Expand Up @@ -419,20 +503,124 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
}
}

/// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
/// (if it fits in the width/offset, else return None), else return `new`
pub fn recover_comment_removed(new: String,
span: Span,
context: &RewriteContext,
width: usize,
offset: Indent)
-> Option<String> {
let snippet = context.snippet(span);
if changed_comment_content(&snippet, &new) {
// We missed some comments
// Keep previous formatting if it satisfies the constrains
return wrap_str(snippet, context.config.max_width, width, offset);
} else {
Some(new)
}
}

/// Return true if the two strings of code have the same payload of comments.
/// The payload of comments is everything in the string except:
/// - actual code (not comments)
/// - comment start/end marks
/// - whitespace
/// - '*' at the beginning of lines in block comments
fn changed_comment_content(orig: &str, new: &str) -> bool {
// Cannot write this as a fn since we cannot return types containing closures
let code_comment_content = |code| {
let slices = UngroupedCommentCodeSlices::new(code);
slices.filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
.flat_map(|(_, _, s)| CommentReducer::new(s))
};
let res = code_comment_content(orig).ne(code_comment_content(new));
debug!("comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
res,
orig,
new,
code_comment_content(orig).collect::<String>(),
code_comment_content(new).collect::<String>());
res
}


/// Iterator over the 'payload' characters of a comment.
/// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
/// The comment must be one comment, ie not more than one start mark (no multiple line comments,
/// for example).
struct CommentReducer<'a> {
is_block: bool,
at_start_line: bool,
iter: std::str::Chars<'a>,
}

impl<'a> CommentReducer<'a> {
fn new(comment: &'a str) -> CommentReducer<'a> {
let is_block = comment.starts_with("/*");
let comment = remove_comment_header(comment);
CommentReducer {
is_block: is_block,
at_start_line: false, // There are no supplementary '*' on the first line
iter: comment.chars(),
}
}
}

impl<'a> Iterator for CommentReducer<'a> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
loop {
let mut c = try_opt!(self.iter.next());
if self.is_block && self.at_start_line {
while c.is_whitespace() {
c = try_opt!(self.iter.next());
}
// Ignore leading '*'
if c == '*' {
c = try_opt!(self.iter.next());
}
} else {
if c == '\n' {
self.at_start_line = true;
}
}
if !c.is_whitespace() {
return Some(c);
}
}
}
}


fn remove_comment_header(comment: &str) -> &str {
if comment.starts_with("///") || comment.starts_with("//!") {
&comment[3..]
} else if comment.starts_with("//") {
&comment[2..]
} else if comment.starts_with("/**") || comment.starts_with("/*!") {
&comment[3..comment.len() - 2]
} else {
assert!(comment.starts_with("/*"),
format!("string '{}' is not a comment", comment));
&comment[2..comment.len() - 2]
}
}

#[cfg(test)]
mod test {
use super::{CharClasses, CodeCharKind, contains_comment, rewrite_comment, FindUncommented,
CommentCodeSlices};
use super::{CharClasses, CodeCharKind, FullCodeCharKind, contains_comment, rewrite_comment,
FindUncommented, CommentCodeSlices};
use Indent;

#[test]
fn char_classes() {
let mut iter = CharClasses::new("//\n\n".chars());

assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
assert_eq!((CodeCharKind::Comment, '\n'), iter.next().unwrap());
assert_eq!((CodeCharKind::Normal, '\n'), iter.next().unwrap());
assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
assert_eq!(None, iter.next());
}

Expand Down Expand Up @@ -507,8 +695,8 @@ mod test {
CharClasses::new(text.chars())
.filter_map(|(s, c)| {
match s {
CodeCharKind::Normal => Some(c),
CodeCharKind::Comment => None,
FullCodeCharKind::Normal => Some(c),
_ => None,
}
})
.collect()
Expand Down

0 comments on commit 9f98f72

Please sign in to comment.