Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move gather_comments. #121815

Merged
merged 1 commit into from Mar 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Expand Up @@ -3487,6 +3487,7 @@ version = "0.0.0"
dependencies = [
"itertools 0.11.0",
"rustc_ast",
"rustc_lexer",
"rustc_span",
"thin-vec",
]
Expand Down
126 changes: 1 addition & 125 deletions compiler/rustc_ast/src/util/comments.rs
@@ -1,6 +1,5 @@
use crate::token::CommentKind;
use rustc_span::source_map::SourceMap;
use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};
use rustc_span::{BytePos, Symbol};

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -131,126 +130,3 @@ pub fn beautify_doc_string(data: Symbol, kind: CommentKind) -> Symbol {
}
data
}

/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
/// whitespace. Note that `k` may be outside bounds of `s`.
fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
let mut idx = 0;
for (i, ch) in s.char_indices().take(col.to_usize()) {
if !ch.is_whitespace() {
return None;
}
idx = i + ch.len_utf8();
}
Some(idx)
}

fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
let len = s.len();
match all_whitespace(s, col) {
Some(col) => {
if col < len {
&s[col..]
} else {
""
}
}
None => s,
}
}

fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> {
let mut res: Vec<String> = vec![];
let mut lines = text.lines();
// just push the first line
res.extend(lines.next().map(|it| it.to_string()));
// for other lines, strip common whitespace prefix
for line in lines {
res.push(trim_whitespace_prefix(line, col).to_string())
}
res
}

// it appears this function is called only from pprust... that's
// probably not a good thing.
pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> {
let sm = SourceMap::new(sm.path_mapping().clone());
let source_file = sm.new_source_file(path, src);
let text = (*source_file.src.as_ref().unwrap()).clone();

let text: &str = text.as_str();
let start_bpos = source_file.start_pos;
let mut pos = 0;
let mut comments: Vec<Comment> = Vec::new();
let mut code_to_the_left = false;

if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
comments.push(Comment {
style: CommentStyle::Isolated,
lines: vec![text[..shebang_len].to_string()],
pos: start_bpos,
});
pos += shebang_len;
}

for token in rustc_lexer::tokenize(&text[pos..]) {
let token_text = &text[pos..pos + token.len as usize];
match token.kind {
rustc_lexer::TokenKind::Whitespace => {
if let Some(mut idx) = token_text.find('\n') {
code_to_the_left = false;
while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
idx += 1 + next_newline;
comments.push(Comment {
style: CommentStyle::BlankLine,
lines: vec![],
pos: start_bpos + BytePos((pos + idx) as u32),
});
}
}
}
rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
if doc_style.is_none() {
let code_to_the_right = !matches!(
text[pos + token.len as usize..].chars().next(),
Some('\r' | '\n')
);
let style = match (code_to_the_left, code_to_the_right) {
(_, true) => CommentStyle::Mixed,
(false, false) => CommentStyle::Isolated,
(true, false) => CommentStyle::Trailing,
};

// Count the number of chars since the start of the line by rescanning.
let pos_in_file = start_bpos + BytePos(pos as u32);
let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
let col = CharPos(text[line_begin_pos..pos].chars().count());

let lines = split_block_comment_into_lines(token_text, col);
comments.push(Comment { style, lines, pos: pos_in_file })
}
}
rustc_lexer::TokenKind::LineComment { doc_style } => {
if doc_style.is_none() {
comments.push(Comment {
style: if code_to_the_left {
CommentStyle::Trailing
} else {
CommentStyle::Isolated
},
lines: vec![token_text.to_string()],
pos: start_bpos + BytePos(pos as u32),
})
}
}
_ => {
code_to_the_left = true;
}
}
pos += token.len as usize;
}

comments
}
1 change: 1 addition & 0 deletions compiler/rustc_ast_pretty/Cargo.toml
Expand Up @@ -7,6 +7,7 @@ edition = "2021"
# tidy-alphabetical-start
itertools = "0.11"
rustc_ast = { path = "../rustc_ast" }
rustc_lexer = { path = "../rustc_lexer" }
rustc_span = { path = "../rustc_span" }
thin-vec = "0.2.12"
# tidy-alphabetical-end
125 changes: 123 additions & 2 deletions compiler/rustc_ast_pretty/src/pprust/state.rs
Expand Up @@ -14,7 +14,7 @@ use rustc_ast::ptr::P;
use rustc_ast::token::{self, BinOpToken, CommentKind, Delimiter, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::{Spacing, TokenStream, TokenTree};
use rustc_ast::util::classify;
use rustc_ast::util::comments::{gather_comments, Comment, CommentStyle};
use rustc_ast::util::comments::{Comment, CommentStyle};
use rustc_ast::util::parser;
use rustc_ast::{self as ast, AttrArgs, AttrArgsEq, BlockCheckMode, PatKind};
use rustc_ast::{attr, BindingAnnotation, ByRef, DelimArgs, RangeEnd, RangeSyntax, Term};
Expand All @@ -24,7 +24,7 @@ use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
use rustc_span::edition::Edition;
use rustc_span::source_map::{SourceMap, Spanned};
use rustc_span::symbol::{kw, sym, Ident, IdentPrinter, Symbol};
use rustc_span::{BytePos, FileName, Span, DUMMY_SP};
use rustc_span::{BytePos, CharPos, FileName, Pos, Span, DUMMY_SP};
use std::borrow::Cow;
use thin_vec::ThinVec;

Expand Down Expand Up @@ -59,6 +59,127 @@ pub struct Comments<'a> {
current: usize,
}

/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
/// whitespace. Note that `k` may be outside bounds of `s`.
fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
let mut idx = 0;
for (i, ch) in s.char_indices().take(col.to_usize()) {
if !ch.is_whitespace() {
return None;
}
idx = i + ch.len_utf8();
}
Some(idx)
}

fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
let len = s.len();
match all_whitespace(s, col) {
Some(col) => {
if col < len {
&s[col..]
} else {
""
}
}
None => s,
}
}

fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> {
let mut res: Vec<String> = vec![];
let mut lines = text.lines();
// just push the first line
res.extend(lines.next().map(|it| it.to_string()));
// for other lines, strip common whitespace prefix
for line in lines {
res.push(trim_whitespace_prefix(line, col).to_string())
}
res
}

fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> {
let sm = SourceMap::new(sm.path_mapping().clone());
let source_file = sm.new_source_file(path, src);
let text = (*source_file.src.as_ref().unwrap()).clone();

let text: &str = text.as_str();
let start_bpos = source_file.start_pos;
let mut pos = 0;
let mut comments: Vec<Comment> = Vec::new();
let mut code_to_the_left = false;

if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
comments.push(Comment {
style: CommentStyle::Isolated,
lines: vec![text[..shebang_len].to_string()],
pos: start_bpos,
});
pos += shebang_len;
}

for token in rustc_lexer::tokenize(&text[pos..]) {
let token_text = &text[pos..pos + token.len as usize];
match token.kind {
rustc_lexer::TokenKind::Whitespace => {
if let Some(mut idx) = token_text.find('\n') {
code_to_the_left = false;
while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
idx += 1 + next_newline;
comments.push(Comment {
style: CommentStyle::BlankLine,
lines: vec![],
pos: start_bpos + BytePos((pos + idx) as u32),
});
}
}
}
rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
if doc_style.is_none() {
let code_to_the_right = !matches!(
text[pos + token.len as usize..].chars().next(),
Some('\r' | '\n')
);
let style = match (code_to_the_left, code_to_the_right) {
(_, true) => CommentStyle::Mixed,
(false, false) => CommentStyle::Isolated,
(true, false) => CommentStyle::Trailing,
};

// Count the number of chars since the start of the line by rescanning.
let pos_in_file = start_bpos + BytePos(pos as u32);
let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
let col = CharPos(text[line_begin_pos..pos].chars().count());

let lines = split_block_comment_into_lines(token_text, col);
comments.push(Comment { style, lines, pos: pos_in_file })
}
}
rustc_lexer::TokenKind::LineComment { doc_style } => {
if doc_style.is_none() {
comments.push(Comment {
style: if code_to_the_left {
CommentStyle::Trailing
} else {
CommentStyle::Isolated
},
lines: vec![token_text.to_string()],
pos: start_bpos + BytePos(pos as u32),
})
}
}
_ => {
code_to_the_left = true;
}
}
pos += token.len as usize;
}

comments
}

impl<'a> Comments<'a> {
pub fn new(sm: &'a SourceMap, filename: FileName, input: String) -> Comments<'a> {
let comments = gather_comments(sm, filename, input);
Expand Down