Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

Commit

Permalink
perf(rome_js_formatter): Reduce FormatElement size (#2456)
Browse files Browse the repository at this point in the history
This PR reduces the size of a `FormatElement` from 56 to 32 bytes by:

* Using a `Box<str>` to store a dynamic token's string as it doesn't need to be mutable (safes 8 bytes for the `capacity`)
* Only storing the start position for a `DynamicToken` because the length isn't used by the printer
* Change the semantics of `verbatim_ranges` to store the ranges in the **formatted** string. The ranges in the formatted output should be sufficient for debugging and it allows to resolve the `String` rather than having to store it on the `FormatElement` (range can be computed in the printer).

This improves the formatter's performance by about 10%
  • Loading branch information
MichaReiser committed Apr 20, 2022
1 parent 7358ffa commit f52bb37
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 84 deletions.
108 changes: 59 additions & 49 deletions crates/rome_formatter/src/format_element.rs
@@ -1,6 +1,6 @@
use crate::format_elements;
use crate::intersperse::{Intersperse, IntersperseFn};
use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments, TextRange};
use crate::{format_elements, TextSize};
use rome_rowan::{Language, SyntaxNode, SyntaxToken, SyntaxTriviaPieceComments};
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
use std::ops::Deref;
Expand Down Expand Up @@ -823,46 +823,29 @@ where
L: Language,
{
/// Get the number of line breaks between two consecutive SyntaxNodes in the tree
fn get_lines_between_nodes<L: Language>(
prev_node: &SyntaxNode<L>,
next_node: &SyntaxNode<L>,
) -> usize {
// Ensure the two nodes are actually siblings on debug
debug_assert_eq!(prev_node.next_sibling().as_ref(), Some(next_node));
debug_assert_eq!(next_node.prev_sibling().as_ref(), Some(prev_node));

// Count the lines separating the two statements,
// starting with the trailing trivia of the previous node
let mut line_count = prev_node
.last_trailing_trivia()
.and_then(|prev_token| {
// Newline pieces can only come last in trailing trivias, skip to it directly
prev_token.pieces().next_back()?.as_newline()
})
.is_some() as usize;

// Then add the newlines in the leading trivia of the next node
fn get_lines_before<L: Language>(next_node: &SyntaxNode<L>) -> usize {
// Count the newlines in the leading trivia of the next node
if let Some(leading_trivia) = next_node.first_leading_trivia() {
for piece in leading_trivia.pieces() {
if piece.is_newline() {
line_count += 1;
} else if piece.is_comments() {
leading_trivia
.pieces()
.take_while(|piece| {
// Stop at the first comment piece, the comment printer
// will handle newlines between the comment and the node
break;
}
}
!piece.is_comments()
})
.filter(|piece| piece.is_newline())
.count()
} else {
0
}

line_count
}

concat_elements(IntersperseFn::new(
elements.into_iter(),
|prev_node, next_node, next_elem| {
|_, next_node, next_elem| {
if next_elem.is_empty() {
empty_element()
} else if get_lines_between_nodes(prev_node, next_node) > 1 {
} else if get_lines_before(next_node) > 1 {
empty_line()
} else {
separator()
Expand Down Expand Up @@ -928,10 +911,8 @@ pub enum VerbatimKind {
Unknown,
Suppressed,
Verbatim {
/// the range that belongs to the node/token formatted verbatim
range: TextRange,
/// the text that belongs to the node/token formatted verbatim
text: String,
/// the length of the formatted node
length: TextSize,
},
}

Expand All @@ -945,10 +926,10 @@ pub struct Verbatim {
}

impl Verbatim {
pub fn new_verbatim(element: FormatElement, text: String, range: TextRange) -> Self {
pub fn new_verbatim(element: FormatElement, length: TextSize) -> Self {
Self {
element: Box::new(element),
kind: VerbatimKind::Verbatim { range, text },
kind: VerbatimKind::Verbatim { length },
}
}

Expand Down Expand Up @@ -1139,16 +1120,21 @@ pub enum Token {
Static { text: &'static str },
/// Token constructed from the input source as a dynamics
/// string and a range of the input source
Dynamic { text: String, source: TextRange },
Dynamic {
// There's no need for the text to be mutable, using `Box<str>` safes 8 bytes over `String`.
text: Box<str>,
// The position of the dynamic token in the unformatted source code
source_position: TextSize,
},
}

impl Debug for Token {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
// This does not use debug_tuple so the tokens are
// written on a single line even when pretty-printing
match self {
Token::Static { text } => write!(fmt, "Token({:?})", text),
Token::Dynamic { text, source } => write!(fmt, "Token({:?}, {:?})", text, source),
Token::Static { text } => write!(fmt, "StaticToken({:?})", text),
Token::Dynamic { text, .. } => write!(fmt, "DynamicToken({:?})", text),
}
}
}
Expand All @@ -1160,17 +1146,22 @@ impl Token {
}

/// Create a token from a dynamic string and a range of the input source
pub fn new_dynamic(text: String, source: TextRange) -> Self {
pub fn new_dynamic(text: String, position: TextSize) -> Self {
debug_assert!(!text.contains('\r'), "The content '{}' contains an unsupported '\\r' line terminator character but string tokens must only use line feeds '\\n' as line separator. Use '\\n' instead of '\\r' and '\\r\\n' to insert a line break in strings.", text);
Self::Dynamic { text, source }
Self::Dynamic {
text: text.into_boxed_str(),
source_position: position,
}
}

/// Get the range of the input source covered by this token,
/// or None if the token was synthesized by the formatter
pub fn source(&self) -> Option<&TextRange> {
pub fn source(&self) -> Option<&TextSize> {
match self {
Token::Static { .. } => None,
Token::Dynamic { source, .. } => Some(source),
Token::Dynamic {
source_position, ..
} => Some(source_position),
}
}
}
Expand All @@ -1190,7 +1181,10 @@ impl<L: Language> From<SyntaxToken<L>> for Token {

impl<'a, L: Language> From<&'a SyntaxToken<L>> for Token {
fn from(token: &'a SyntaxToken<L>) -> Self {
Self::new_dynamic(token.text_trimmed().into(), token.text_trimmed_range())
Self::new_dynamic(
token.text_trimmed().into(),
token.text_trimmed_range().start(),
)
}
}

Expand Down Expand Up @@ -1230,7 +1224,7 @@ impl<L: Language> From<SyntaxTriviaPieceComments<L>> for Token {
fn from(trivia: SyntaxTriviaPieceComments<L>) -> Self {
Self::new_dynamic(
normalize_newlines(trivia.text().trim(), LINE_TERMINATORS).into_owned(),
trivia.text_range(),
trivia.text_range().start(),
)
}
}
Expand Down Expand Up @@ -1398,9 +1392,10 @@ impl From<Option<FormatElement>> for FormatElement {
mod tests {

use crate::format_element::{
empty_element, join_elements, normalize_newlines, List, LINE_TERMINATORS,
empty_element, join_elements, normalize_newlines, ConditionalGroupContent, List,
VerbatimKind, LINE_TERMINATORS,
};
use crate::{concat_elements, space_token, token, FormatElement};
use crate::{concat_elements, space_token, token, FormatElement, TextRange, Token, Verbatim};

#[test]
fn concat_elements_returns_a_list_token_containing_the_passed_in_elements() {
Expand Down Expand Up @@ -1516,4 +1511,19 @@ mod tests {
assert_eq!(normalize_newlines("a\u{2028}b", LINE_TERMINATORS), "a\nb");
assert_eq!(normalize_newlines("a\u{2029}b", LINE_TERMINATORS), "a\nb");
}

#[test]
fn sizes() {
assert_eq!(8, std::mem::size_of::<TextRange>());
assert_eq!(8, std::mem::size_of::<VerbatimKind>());
assert_eq!(16, std::mem::size_of::<Verbatim>());
assert_eq!(24, std::mem::size_of::<Token>());
assert_eq!(16, std::mem::size_of::<ConditionalGroupContent>());
assert_eq!(24, std::mem::size_of::<List>());
// Increasing the size of FormatElement has serious consequences on runtime performance and memory footprint.
// Is there a more efficient way to encode the data to avoid increasing its size? Can the information
// be recomputed at a later point in time?
// You reduced the size of a format element? Excellent work!
assert_eq!(32, std::mem::size_of::<FormatElement>());
}
}
20 changes: 12 additions & 8 deletions crates/rome_formatter/src/lib.rs
Expand Up @@ -203,21 +203,21 @@ pub struct Formatted {
code: String,
range: Option<TextRange>,
sourcemap: Vec<SourceMarker>,
verbatim_source: Vec<(String, TextRange)>,
verbatim_ranges: Vec<TextRange>,
}

impl Formatted {
pub fn new(
code: String,
range: Option<TextRange>,
sourcemap: Vec<SourceMarker>,
verbatim_source: Vec<(String, TextRange)>,
verbatim_source: Vec<TextRange>,
) -> Self {
Self {
code,
range,
sourcemap,
verbatim_source,
verbatim_ranges: verbatim_source,
}
}

Expand All @@ -227,7 +227,7 @@ impl Formatted {
code: String::new(),
range: None,
sourcemap: Vec::new(),
verbatim_source: Vec::new(),
verbatim_ranges: Vec::new(),
}
}

Expand Down Expand Up @@ -259,12 +259,16 @@ impl Formatted {
self.code
}

pub fn verbatim(&self) -> &[(String, TextRange)] {
&self.verbatim_source
/// The text in the formatted code that has been formatted as verbatim.
pub fn verbatim(&self) -> impl Iterator<Item = (TextRange, &str)> {
self.verbatim_ranges
.iter()
.map(|range| (*range, &self.code[*range]))
}

pub fn into_verbatim(self) -> Vec<(String, TextRange)> {
self.verbatim_source
/// Ranges of the formatted code that have been formatted as verbatim.
pub fn verbatim_ranges(&self) -> &[TextRange] {
&self.verbatim_ranges
}
}

Expand Down
14 changes: 8 additions & 6 deletions crates/rome_formatter/src/printer.rs
Expand Up @@ -171,9 +171,9 @@ impl<'a> Printer<'a> {
self.state.pending_space = false;
}

if let Some(range) = token.source() {
if let Some(source) = token.source() {
self.state.source_markers.push(SourceMarker {
source: range.start(),
source: *source,
dest: TextSize::from(self.state.buffer.len() as u32),
});
}
Expand Down Expand Up @@ -271,8 +271,11 @@ impl<'a> Printer<'a> {
}

FormatElement::Verbatim(verbatim) => {
if let VerbatimKind::Verbatim { range, text } = &verbatim.kind {
self.state.verbatim_markers.push((text.clone(), *range));
if let VerbatimKind::Verbatim { length } = &verbatim.kind {
self.state.verbatim_markers.push(TextRange::at(
TextSize::from(self.state.buffer.len() as u32),
*length,
));
}

queue.enqueue(PrintElementCall::new(&verbatim.element, args));
Expand Down Expand Up @@ -499,9 +502,8 @@ struct PrinterState<'a> {
generated_column: usize,
line_width: usize,
has_empty_line: bool,
// mappings: Mapping[];
line_suffixes: Vec<PrintElementCall<'a>>,
verbatim_markers: Vec<(String, TextRange)>,
verbatim_markers: Vec<TextRange>,
}

impl<'a> PrinterState<'a> {
Expand Down
14 changes: 5 additions & 9 deletions crates/rome_js_formatter/src/formatter.rs
Expand Up @@ -433,7 +433,7 @@ impl Formatter {
let text = &token.text()[relative_skipped_range];
elements.push(FormatElement::from(Token::new_dynamic(
text.to_string(),
skipped_trivia_range,
skipped_trivia_range.start(),
)));

// `print_trailing_trivia_pieces` and `format_leading_trivia_pieces` remove any whitespace except
Expand Down Expand Up @@ -604,17 +604,13 @@ impl Formatter {
/// "mess up" the developers, yet incomplete, work or accidentally introduce new syntax errors.
///
/// You may be inclined to call `node.text` directly. However, using `text` doesn't track the nodes
///nor its children source mapping information, resulting in incorrect source maps for this subtree.
/// nor its children source mapping information, resulting in incorrect source maps for this subtree.
///
/// These nodes and tokens get tracked as [FormatElement::Verbatim], useful to understand
/// if these nodes still need to have their own implementation.
pub fn format_verbatim(&self, node: &JsSyntaxNode) -> FormatElement {
let verbatim = self.format_verbatim_node_or_token(node);
FormatElement::Verbatim(Verbatim::new_verbatim(
verbatim,
node.to_string(),
node.text_range(),
))
FormatElement::Verbatim(Verbatim::new_verbatim(verbatim, node.text_range().len()))
}

/// Formats unknown nodes. The difference between this method and `format_verbatim` is that this method
Expand Down Expand Up @@ -653,7 +649,7 @@ impl Formatter {
fn trivia_token<L: Language>(piece: SyntaxTriviaPiece<L>) -> Token {
Token::new_dynamic(
normalize_newlines(piece.text(), LINE_TERMINATORS).into_owned(),
piece.text_range(),
piece.text_range().start(),
)
}

Expand All @@ -666,7 +662,7 @@ impl Formatter {

let content = Token::new_dynamic(
normalize_newlines(&node.text_trimmed().to_string(), LINE_TERMINATORS).into_owned(),
node.text_trimmed_range(),
node.text_trimmed_range().start(),
);

// Clippy false positive: SkipWhile does not implement DoubleEndedIterator
Expand Down
8 changes: 4 additions & 4 deletions crates/rome_js_formatter/src/lib.rs
Expand Up @@ -222,13 +222,13 @@ pub fn format_range(
let input_range = TextRange::new(start_source, end_source);
let output_range = TextRange::new(start_dest, end_dest);
let sourcemap = Vec::from(formatted.sourcemap());
let verbatim = Vec::from(formatted.verbatim());
let verbatim_ranges = Vec::from(formatted.verbatim_ranges());
let code = &formatted.into_code()[output_range];
Ok(Formatted::new(
code.into(),
Some(input_range),
sourcemap,
verbatim,
verbatim_ranges,
))
}

Expand Down Expand Up @@ -294,12 +294,12 @@ pub fn format_node(options: FormatOptions, root: &JsSyntaxNode) -> FormatResult<
let element = Formatter::new(options).format_root(root)?;
let formatted = Printer::new(options).print_with_indent(&element, initial_indent);
let sourcemap = Vec::from(formatted.sourcemap());
let verbatim = Vec::from(formatted.verbatim());
let verbatim_ranges = Vec::from(formatted.verbatim_ranges());
Ok(Formatted::new(
formatted.into_code(),
Some(root.text_range()),
sourcemap,
verbatim,
verbatim_ranges,
))
}

Expand Down
2 changes: 1 addition & 1 deletion crates/rome_js_formatter/src/ts/types/intersection_type.rs
Expand Up @@ -21,7 +21,7 @@ impl ToFormatElement for TsIntersectionType {
// if_group_breaks block to avoid removing comments when the
// group does not break
let replaced =
if_group_breaks(format_elements![Token::from(token.clone()), space_token()]);
if_group_breaks(format_elements![Token::from(&token), space_token()]);
formatter.format_replaced(&token, replaced)
}
None => if_group_breaks(format_elements![token("&"), space_token()]),
Expand Down
2 changes: 1 addition & 1 deletion crates/rome_js_formatter/src/ts/types/union_type.rs
Expand Up @@ -21,7 +21,7 @@ impl ToFormatElement for TsUnionType {
// if_group_breaks block to avoid removing comments when the
// group does not break
let replaced =
if_group_breaks(format_elements![Token::from(token.clone()), space_token()]);
if_group_breaks(format_elements![Token::from(&token), space_token()]);
formatter.format_replaced(&token, replaced)
}
None => if_group_breaks(format_elements![token("|"), space_token()]),
Expand Down

0 comments on commit f52bb37

Please sign in to comment.