Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

perf(rome_js_formatter): Reduce FormatElement size #2456

Merged
merged 4 commits into from Apr 20, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
111 changes: 60 additions & 51 deletions crates/rome_formatter/src/format_element.rs
@@ -1,6 +1,6 @@
use crate::format_elements;
use crate::intersperse::{Intersperse, IntersperseFn};
use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments, TextRange};
use crate::{format_elements, TextSize};
use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments};
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
use std::ops::Deref;
Expand Down Expand Up @@ -823,46 +823,28 @@ where
L: Language,
{
/// Get the number of line breaks between two consecutive SyntaxNodes in the tree
fn get_lines_between_nodes<L: Language>(
prev_node: &SyntaxNode<L>,
next_node: &SyntaxNode<L>,
) -> usize {
// Ensure the two nodes are actually siblings on debug
debug_assert_eq!(prev_node.next_sibling().as_ref(), Some(next_node));
debug_assert_eq!(next_node.prev_sibling().as_ref(), Some(prev_node));

// Count the lines separating the two statements,
// starting with the trailing trivia of the previous node
let mut line_count = prev_node
.last_trailing_trivia()
.and_then(|prev_token| {
// Newline pieces can only come last in trailing trivias, skip to it directly
prev_token.pieces().next_back()?.as_newline()
})
.is_some() as usize;

MichaReiser marked this conversation as resolved.
Show resolved Hide resolved
// Then add the newlines in the leading trivia of the next node
fn get_lines_before<L: Language>(next_node: &SyntaxNode<L>) -> usize {
ematipico marked this conversation as resolved.
Show resolved Hide resolved
// Count the newlines in the leading trivia of the next node
if let Some(leading_trivia) = next_node.first_leading_trivia() {
for piece in leading_trivia.pieces() {
if piece.is_newline() {
line_count += 1;
} else if piece.is_comments() {
// Stop at the first comment piece, the comment printer
// will handle newlines between the comment and the node
break;
}
}
leading_trivia
.pieces()
.take_while(|piece|
// Stop at the first comment piece, the comment printer
// will handle newlines between the comment and the node
!piece.is_comments())
MichaReiser marked this conversation as resolved.
Show resolved Hide resolved
.filter(|piece| piece.is_newline())
.count()
} else {
0
}

line_count
}

concat_elements(IntersperseFn::new(
elements.into_iter(),
|prev_node, next_node, next_elem| {
|_, next_node, next_elem| {
if next_elem.is_empty() {
empty_element()
} else if get_lines_between_nodes(prev_node, next_node) > 1 {
} else if get_lines_before(next_node) > 1 {
empty_line()
} else {
separator()
Expand Down Expand Up @@ -928,10 +910,8 @@ pub enum VerbatimKind {
Unknown,
Suppressed,
Verbatim {
/// the range that belongs to the node/token formatted verbatim
range: TextRange,
/// the text that belongs to the node/token formatted verbatim
text: String,
/// the length of the formatted node
length: TextSize,
},
}

Expand All @@ -945,10 +925,10 @@ pub struct Verbatim {
}

impl Verbatim {
pub fn new_verbatim(element: FormatElement, text: String, range: TextRange) -> Self {
pub fn new_verbatim(element: FormatElement, length: TextSize) -> Self {
Self {
element: Box::new(element),
kind: VerbatimKind::Verbatim { range, text },
kind: VerbatimKind::Verbatim { length },
}
}

Expand Down Expand Up @@ -1139,16 +1119,21 @@ pub enum Token {
Static { text: &'static str },
/// Token constructed from the input source as a dynamics
/// string and a range of the input source
Dynamic { text: String, source: TextRange },
Dynamic {
// There's no need for the text to be mutable, using `Box<str>` safes 8 bytes over `String`.
text: Box<str>,
// The position of the dynamic token in the unformatted source code
source_position: TextSize,
},
}

impl Debug for Token {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
// This does not use debug_tuple so the tokens are
// written on a single line even when pretty-printing
match self {
Token::Static { text } => write!(fmt, "Token({:?})", text),
Token::Dynamic { text, source } => write!(fmt, "Token({:?}, {:?})", text, source),
Token::Static { text } => write!(fmt, "StaticToken({:?})", text),
Token::Dynamic { text, .. } => write!(fmt, "DynamicToken({:?})", text),
}
}
}
Expand All @@ -1160,17 +1145,22 @@ impl Token {
}

/// Create a token from a dynamic string and a range of the input source
pub fn new_dynamic(text: String, source: TextRange) -> Self {
pub fn new_dynamic(text: String, position: TextSize) -> Self {
debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text);
Self::Dynamic { text, source }
Self::Dynamic {
text: text.into_boxed_str(),
source_position: position,
}
}

/// Get the range of the input source covered by this token,
/// or None if the token was synthesized by the formatter
pub fn source(&self) -> Option<&TextRange> {
pub fn source(&self) -> Option<&TextSize> {
match self {
Token::Static { .. } => None,
Token::Dynamic { source, .. } => Some(source),
Token::Dynamic {
source_position, ..
} => Some(source_position),
}
}
}
Expand All @@ -1190,7 +1180,10 @@ impl<L: Language> From<SyntaxToken<L>> for Token {

impl<'a, L: Language> From<&'a SyntaxToken<L>> for Token {
fn from(token: &'a SyntaxToken<L>) -> Self {
Self::new_dynamic(token.text_trimmed().into(), token.text_trimmed_range())
Self::new_dynamic(
token.text_trimmed().into(),
token.text_trimmed_range().start(),
)
}
}

Expand Down Expand Up @@ -1230,7 +1223,7 @@ impl<L: Language> From<SyntaxTriviaPieceComments<L>> for Token {
fn from(trivia: SyntaxTriviaPieceComments<L>) -> Self {
Self::new_dynamic(
normalize_newlines(trivia.text().trim(), LINE_TERMINATORS).into_owned(),
trivia.text_range(),
trivia.text_range().start(),
)
}
}
Expand Down Expand Up @@ -1398,9 +1391,10 @@ impl From<Option<FormatElement>> for FormatElement {
mod tests {

use crate::format_element::{
empty_element, join_elements, normalize_newlines, List, LINE_TERMINATORS,
empty_element, join_elements, normalize_newlines, ConditionalGroupContent, List,
VerbatimKind, LINE_TERMINATORS,
};
use crate::{concat_elements, space_token, token, FormatElement};
use crate::{concat_elements, space_token, token, FormatElement, TextRange, Token, Verbatim};

#[test]
fn concat_elements_returns_a_list_token_containing_the_passed_in_elements() {
Expand Down Expand Up @@ -1516,4 +1510,19 @@ mod tests {
assert_eq!(normalize_newlines("a\u{2028}b", LINE_TERMINATORS), "a\nb");
assert_eq!(normalize_newlines("a\u{2029}b", LINE_TERMINATORS), "a\nb");
}

#[test]
fn sizes() {
assert_eq!(8, std::mem::size_of::<TextRange>());
assert_eq!(8, std::mem::size_of::<VerbatimKind>());
assert_eq!(16, std::mem::size_of::<Verbatim>());
assert_eq!(24, std::mem::size_of::<Token>());
assert_eq!(16, std::mem::size_of::<ConditionalGroupContent>());
assert_eq!(24, std::mem::size_of::<List>());
// Increasing the size of FormatElement has serious consequences on runtime performance and memory footprint.
// Is there a more efficient way to encode the data to avoid increasing its size? Can the information
// be recomputed at a later point in time?
// You reduced the size of a format element? Excellent work!
assert_eq!(32, std::mem::size_of::<FormatElement>());
}
}
20 changes: 12 additions & 8 deletions crates/rome_formatter/src/lib.rs
Expand Up @@ -203,21 +203,21 @@ pub struct Formatted {
code: String,
range: Option<TextRange>,
sourcemap: Vec<SourceMarker>,
verbatim_source: Vec<(String, TextRange)>,
verbatim_ranges: Vec<TextRange>,
}

impl Formatted {
pub fn new(
code: String,
range: Option<TextRange>,
sourcemap: Vec<SourceMarker>,
verbatim_source: Vec<(String, TextRange)>,
verbatim_source: Vec<TextRange>,
) -> Self {
Self {
code,
range,
sourcemap,
verbatim_source,
verbatim_ranges: verbatim_source,
}
}

Expand All @@ -227,7 +227,7 @@ impl Formatted {
code: String::new(),
range: None,
sourcemap: Vec::new(),
verbatim_source: Vec::new(),
verbatim_ranges: Vec::new(),
}
}

Expand Down Expand Up @@ -259,12 +259,16 @@ impl Formatted {
self.code
}

pub fn verbatim(&self) -> &[(String, TextRange)] {
&self.verbatim_source
/// The text in the formatted code that has been formatted as verbatim.
pub fn verbatim(&self) -> impl Iterator<Item = (TextRange, &str)> {
self.verbatim_ranges
.iter()
.map(|range| (*range, &self.code[*range]))
}

pub fn into_verbatim(self) -> Vec<(String, TextRange)> {
self.verbatim_source
Comment on lines -266 to -267
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason why we removed this function? It was there mainly to be in line with into_sourcemap and into_code. If it's been removed because not used, then we should do the same with into_sourcemap too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed it because it wasn't used and the struct no longer stores the Vec that it could just take and return. Having the iterator specific method makes it clear that taking the verbatim sources requires a vector allocation.

/// Ranges of the formatted code that have been formatted as verbatim.
pub fn verbatim_ranges(&self) -> &[TextRange] {
&self.verbatim_ranges
}
}

Expand Down
14 changes: 8 additions & 6 deletions crates/rome_formatter/src/printer.rs
Expand Up @@ -171,9 +171,9 @@ impl<'a> Printer<'a> {
self.state.pending_space = false;
}

if let Some(range) = token.source() {
if let Some(source) = token.source() {
self.state.source_markers.push(SourceMarker {
source: range.start(),
source: *source,
dest: TextSize::from(self.state.buffer.len() as u32),
});
}
Expand Down Expand Up @@ -271,8 +271,11 @@ impl<'a> Printer<'a> {
}

FormatElement::Verbatim(verbatim) => {
if let VerbatimKind::Verbatim { range, text } = &verbatim.kind {
self.state.verbatim_markers.push((text.clone(), *range));
if let VerbatimKind::Verbatim { length } = &verbatim.kind {
self.state.verbatim_markers.push(TextRange::at(
TextSize::from(self.state.buffer.len() as u32),
*length,
));
}

queue.enqueue(PrintElementCall::new(&verbatim.element, args));
Expand Down Expand Up @@ -499,9 +502,8 @@ struct PrinterState<'a> {
generated_column: usize,
line_width: usize,
has_empty_line: bool,
// mappings: Mapping[];
line_suffixes: Vec<PrintElementCall<'a>>,
verbatim_markers: Vec<(String, TextRange)>,
verbatim_markers: Vec<TextRange>,
}

impl<'a> PrinterState<'a> {
Expand Down
14 changes: 5 additions & 9 deletions crates/rome_js_formatter/src/formatter.rs
Expand Up @@ -433,7 +433,7 @@ impl Formatter {
let text = &token.text()[relative_skipped_range];
elements.push(FormatElement::from(Token::new_dynamic(
text.to_string(),
skipped_trivia_range,
skipped_trivia_range.start(),
)));

// `print_trailing_trivia_pieces` and `format_leading_trivia_pieces` remove any whitespace except
Expand Down Expand Up @@ -604,17 +604,13 @@ impl Formatter {
/// "mess up" the developers, yet incomplete, work or accidentally introduce new syntax errors.
///
/// You may be inclined to call `node.text` directly. However, using `text` doesn't track the nodes
///nor its children source mapping information, resulting in incorrect source maps for this subtree.
/// nor its children source mapping information, resulting in incorrect source maps for this subtree.
///
/// These nodes and tokens get tracked as [FormatElement::Verbatim], useful to understand
/// if these nodes still need to have their own implementation.
pub fn format_verbatim(&self, node: &JsSyntaxNode) -> FormatElement {
let verbatim = self.format_verbatim_node_or_token(node);
FormatElement::Verbatim(Verbatim::new_verbatim(
verbatim,
node.to_string(),
node.text_range(),
))
FormatElement::Verbatim(Verbatim::new_verbatim(verbatim, node.text_range().len()))
}

/// Formats unknown nodes. The difference between this method and `format_verbatim` is that this method
Expand Down Expand Up @@ -653,7 +649,7 @@ impl Formatter {
fn trivia_token<L: Language>(piece: SyntaxTriviaPiece<L>) -> Token {
Token::new_dynamic(
normalize_newlines(piece.text(), LINE_TERMINATORS).into_owned(),
piece.text_range(),
piece.text_range().start(),
)
}

Expand All @@ -666,7 +662,7 @@ impl Formatter {

let content = Token::new_dynamic(
normalize_newlines(&node.text_trimmed().to_string(), LINE_TERMINATORS).into_owned(),
node.text_trimmed_range(),
node.text_trimmed_range().start(),
);

// Clippy false positive: SkipWhile does not implement DoubleEndedIterator
Expand Down
8 changes: 4 additions & 4 deletions crates/rome_js_formatter/src/lib.rs
Expand Up @@ -222,13 +222,13 @@ pub fn format_range(
let input_range = TextRange::new(start_source, end_source);
let output_range = TextRange::new(start_dest, end_dest);
let sourcemap = Vec::from(formatted.sourcemap());
let verbatim = Vec::from(formatted.verbatim());
let verbatim_ranges = Vec::from(formatted.verbatim_ranges());
let code = &formatted.into_code()[output_range];
Ok(Formatted::new(
code.into(),
Some(input_range),
sourcemap,
verbatim,
verbatim_ranges,
))
}

Expand Down Expand Up @@ -294,12 +294,12 @@ pub fn format_node(options: FormatOptions, root: &JsSyntaxNode) -> FormatResult<
let element = Formatter::new(options).format_root(root)?;
let formatted = Printer::new(options).print_with_indent(&element, initial_indent);
let sourcemap = Vec::from(formatted.sourcemap());
let verbatim = Vec::from(formatted.verbatim());
let verbatim_ranges = Vec::from(formatted.verbatim_ranges());
Ok(Formatted::new(
formatted.into_code(),
Some(root.text_range()),
sourcemap,
verbatim,
verbatim_ranges,
))
}

Expand Down
2 changes: 1 addition & 1 deletion crates/rome_js_formatter/src/ts/types/intersection_type.rs
Expand Up @@ -21,7 +21,7 @@ impl ToFormatElement for TsIntersectionType {
// if_group_breaks block to avoid removing comments when the
// group does not break
let replaced =
if_group_breaks(format_elements![Token::from(token.clone()), space_token()]);
if_group_breaks(format_elements![Token::from(&token), space_token()]);
formatter.format_replaced(&token, replaced)
}
None => if_group_breaks(format_elements![token("&"), space_token()]),
Expand Down
2 changes: 1 addition & 1 deletion crates/rome_js_formatter/src/ts/types/union_type.rs
Expand Up @@ -21,7 +21,7 @@ impl ToFormatElement for TsUnionType {
// if_group_breaks block to avoid removing comments when the
// group does not break
let replaced =
if_group_breaks(format_elements![Token::from(token.clone()), space_token()]);
if_group_breaks(format_elements![Token::from(&token), space_token()]);
formatter.format_replaced(&token, replaced)
}
None => if_group_breaks(format_elements![token("|"), space_token()]),
Expand Down