Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid allocating before we have checked if it is in the cache + don't rehash subtree #59

Closed
wants to merge 9 commits into from
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ edition = "2018"
[dependencies]
rustc-hash = "1.0.1"
text-size = "1.0.0"
smallvec = "1.4.0"
smol_str = "0.1.10"
serde = { version = "1.0.89", optional = true, default-features = false }
thin-dst = "1.0.0"
Expand Down
94 changes: 78 additions & 16 deletions src/green/builder.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,67 @@
use rustc_hash::FxHashSet;
use rustc_hash::{FxHashMap, FxHasher};

use super::token::GreenTokenData;
use crate::{
green::{GreenElement, GreenNode, GreenToken, SyntaxKind},
NodeOrToken, SmolStr,
};
use smallvec::SmallVec;
use std::{
fmt::Debug,
hash::{Hash, Hasher},
};

#[derive(Default, Debug)]
pub struct NodeCache {
nodes: FxHashSet<GreenNode>,
tokens: FxHashSet<GreenToken>,
nodes: FxHashMap<GreenNodeHash, GreenNode>,
tokens: FxHashMap<GreenTokenData, GreenToken>,
}

struct GreenNodeHash {
hasher: FxHasher,
inner_hash: u64,
}

impl Eq for GreenNodeHash {}
impl PartialEq for GreenNodeHash {
fn eq(&self, other: &Self) -> bool {
self.inner_hash == other.inner_hash
}
}

impl Debug for GreenNodeHash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.inner_hash.fmt(f)
}
}

impl GreenNodeHash {
fn new<'a>(kind: SyntaxKind) -> Self {
let mut hasher = FxHasher::default();
kind.hash(&mut hasher);
let inner_hash = hasher.finish();
Self { hasher, inner_hash }
}

fn add_child(&mut self, child: &GreenElement) {
child.hash(&mut self.hasher);
self.inner_hash = self.hasher.finish();
}
}

impl Hash for GreenNodeHash {
fn hash<H: Hasher>(&self, state: &mut H) {
self.inner_hash.hash(state);
}
}

impl NodeCache {
fn node<I>(&mut self, kind: SyntaxKind, children: I) -> GreenNode
where
I: IntoIterator<Item = GreenElement>,
I::IntoIter: ExactSizeIterator,
I: ExactSizeIterator<Item = GreenElement>,
simonvandel marked this conversation as resolved.
Show resolved Hide resolved
{
let mut node = GreenNode::new(kind, children);
let num_children = children.len();
const MAX_CHILDREN: usize = 3;
// Green nodes are fully immutable, so it's ok to deduplicate them.
// This is the same optimization that Roslyn does
// https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees
Expand All @@ -26,22 +70,40 @@ impl NodeCache {
// For `libsyntax/parse/parser.rs`, measurements show that deduping saves
// 17% of the memory for green nodes!
// Future work: make hashing faster by avoiding rehashing of subtrees.
if node.children().len() <= 3 {
match self.nodes.get(&node) {
Some(existing) => node = existing.clone(),
None => assert!(self.nodes.insert(node.clone())),
if num_children <= MAX_CHILDREN {
let mut hash = GreenNodeHash::new(kind);
let mut collected_children =
SmallVec::<[GreenElement; MAX_CHILDREN]>::with_capacity(MAX_CHILDREN);
for child in children {
collected_children.push(child);
}
simonvandel marked this conversation as resolved.
Show resolved Hide resolved
for child in collected_children.iter() {
hash.add_child(child);
}
match self.nodes.get(&hash) {
simonvandel marked this conversation as resolved.
Show resolved Hide resolved
Some(existing) => existing.clone(),
None => {
let node = GreenNode::new(kind, collected_children.into_iter());
self.nodes.insert(hash, node.clone());
node
}
}
} else {
GreenNode::new(kind, children)
}
node
}

fn token(&mut self, kind: SyntaxKind, text: SmolStr) -> GreenToken {
let mut token = GreenToken::new(kind, text);
match self.tokens.get(&token) {
Some(existing) => token = existing.clone(),
None => assert!(self.tokens.insert(token.clone())),
let token_data = GreenTokenData::new(kind, text.clone());

match self.tokens.get(&token_data) {
Some(existing) => existing.clone(),
None => {
let token = GreenToken::new(kind, text.clone());
self.tokens.insert(token_data, token.clone());
token
}
}
token
}
}

Expand Down
8 changes: 7 additions & 1 deletion src/green/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,17 @@ use crate::{green::SyntaxKind, SmolStr, TextSize};

#[repr(align(2))] // NB: this is an at-least annotation
#[derive(Debug, PartialEq, Eq, Hash)]
struct GreenTokenData {
pub(crate) struct GreenTokenData {
kind: SyntaxKind,
text: SmolStr,
}

impl GreenTokenData {
pub(crate) fn new(kind: SyntaxKind, text: SmolStr) -> GreenTokenData {
GreenTokenData { kind, text }
}
}

/// Leaf node in the immutable tree.
pub struct GreenToken {
ptr: ptr::NonNull<GreenTokenData>,
Expand Down