Skip to content

Commit

Permalink
Add --report-states flag for reporting state counts for each rule
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbrunsfeld committed Jun 20, 2019
1 parent 438c7c1 commit d4ca2c8
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 76 deletions.
58 changes: 20 additions & 38 deletions cli/src/generate/build_tables/build_parse_table.rs
Expand Up @@ -26,10 +26,11 @@ struct AuxiliarySymbolInfo {
type SymbolSequence = Vec<Symbol>;
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;

pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);

struct ParseStateQueueEntry {
preceding_symbols: SymbolSequence,
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
state_id: ParseStateId,
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
}

struct ParseTableBuilder<'a> {
Expand All @@ -39,14 +40,13 @@ struct ParseTableBuilder<'a> {
variable_info: &'a Vec<VariableInfo>,
core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
parse_table: ParseTable,
state_ids_to_log: Vec<ParseStateId>,
}

impl<'a> ParseTableBuilder<'a> {
fn build(mut self) -> Result<ParseTable> {
fn build(mut self) -> Result<(ParseTable, Vec<ParseStateInfo<'a>>)> {
// Ensure that the empty alias sequence has index 0.
self.parse_table
.production_infos
Expand All @@ -72,27 +72,10 @@ impl<'a> ParseTableBuilder<'a> {
while let Some(entry) = self.parse_state_queue.pop_front() {
let item_set = self
.item_set_builder
.transitive_closure(&self.item_sets_by_state_id[entry.state_id]);

if self.state_ids_to_log.contains(&entry.state_id) {
eprintln!(
"state: {}\n\ninitial item set:\n\n{}closed item set:\n\n{}",
entry.state_id,
super::item::ParseItemSetDisplay(
&self.item_sets_by_state_id[entry.state_id],
self.syntax_grammar,
self.lexical_grammar,
),
super::item::ParseItemSetDisplay(
&item_set,
self.syntax_grammar,
self.lexical_grammar,
)
);
}
.transitive_closure(&self.parse_state_info_by_id[entry.state_id].1);

self.add_actions(
entry.preceding_symbols,
self.parse_state_info_by_id[entry.state_id].0.clone(),
entry.preceding_auxiliary_symbols,
entry.state_id,
item_set,
Expand All @@ -101,7 +84,7 @@ impl<'a> ParseTableBuilder<'a> {

self.remove_precedences();

Ok(self.parse_table)
Ok((self.parse_table, self.parse_state_info_by_id))
}

fn add_parse_state(
Expand All @@ -124,7 +107,9 @@ impl<'a> ParseTableBuilder<'a> {
};

let state_id = self.parse_table.states.len();
self.item_sets_by_state_id.push(v.key().clone());
self.parse_state_info_by_id
.push((preceding_symbols.clone(), v.key().clone()));

self.parse_table.states.push(ParseState {
id: state_id,
lex_state_id: 0,
Expand All @@ -135,7 +120,6 @@ impl<'a> ParseTableBuilder<'a> {
});
self.parse_state_queue.push_back(ParseStateQueueEntry {
state_id,
preceding_symbols: preceding_symbols.clone(),
preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
});
v.insert(state_id);
Expand Down Expand Up @@ -768,13 +752,12 @@ fn populate_following_tokens(
}
}

pub(crate) fn build_parse_table(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
inlines: &InlinedProductionMap,
variable_info: &Vec<VariableInfo>,
state_ids_to_log: Vec<usize>,
) -> Result<(ParseTable, Vec<TokenSet>)> {
pub(crate) fn build_parse_table<'a>(
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
inlines: &'a InlinedProductionMap,
variable_info: &'a Vec<VariableInfo>,
) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
populate_following_tokens(
Expand All @@ -784,15 +767,14 @@ pub(crate) fn build_parse_table(
&item_set_builder,
);

let table = ParseTableBuilder {
let (table, item_sets) = ParseTableBuilder {
syntax_grammar,
lexical_grammar,
state_ids_to_log,
item_set_builder,
variable_info,
state_ids_by_item_set: HashMap::new(),
core_ids_by_core: HashMap::new(),
item_sets_by_state_id: Vec::new(),
parse_state_info_by_id: Vec::new(),
parse_state_queue: VecDeque::new(),
parse_table: ParseTable {
states: Vec::new(),
Expand All @@ -804,5 +786,5 @@ pub(crate) fn build_parse_table(
}
.build()?;

Ok((table, following_tokens))
Ok((table, following_tokens, item_sets))
}
34 changes: 20 additions & 14 deletions cli/src/generate/build_tables/item.rs
@@ -1,4 +1,6 @@
use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
use crate::generate::grammars::{
LexicalGrammar, Production, ProductionStep, SyntaxGrammar,
};
use crate::generate::rules::Associativity;
use crate::generate::rules::{Symbol, SymbolType};
use lazy_static::lazy_static;
Expand Down Expand Up @@ -302,12 +304,14 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
for (i, step) in self.0.production.steps.iter().enumerate() {
if i == self.0.step_index as usize {
write!(f, " •")?;
if step.precedence != 0 || step.associativity.is_some() {
write!(
f,
" (prec {:?} assoc {:?})",
step.precedence, step.associativity
)?;
if let Some(associativity) = step.associativity {
if step.precedence != 0 {
write!(f, " ({} {:?})", step.precedence, associativity)?;
} else {
write!(f, " ({:?})", associativity)?;
}
} else if step.precedence != 0 {
write!(f, " ({})", step.precedence)?;
}
}

Expand All @@ -325,19 +329,21 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
}

if let Some(alias) = &step.alias {
write!(f, " (alias {})", alias.value)?;
write!(f, "@{}", alias.value)?;
}
}

if self.0.is_done() {
write!(f, " •")?;
if let Some(step) = self.0.production.steps.last() {
if step.precedence != 0 || step.associativity.is_some() {
write!(
f,
" (prec {:?} assoc {:?})",
step.precedence, step.associativity
)?;
if let Some(associativity) = step.associativity {
if step.precedence != 0 {
write!(f, " ({} {:?})", step.precedence, associativity)?;
} else {
write!(f, " ({:?})", associativity)?;
}
} else if step.precedence != 0 {
write!(f, " ({})", step.precedence)?;
}
}
}
Expand Down
109 changes: 100 additions & 9 deletions cli/src/generate/build_tables/mod.rs
Expand Up @@ -7,7 +7,7 @@ mod minimize_parse_table;
mod token_conflicts;

use self::build_lex_table::build_lex_table;
use self::build_parse_table::build_parse_table;
use self::build_parse_table::{build_parse_table, ParseStateInfo};
use self::coincident_tokens::CoincidentTokenIndex;
use self::item::TokenSet;
use self::minimize_parse_table::minimize_parse_table;
Expand All @@ -20,6 +20,7 @@ use crate::generate::rules::{AliasMap, Symbol, SymbolType};
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
use hashbrown::{HashMap, HashSet};
use log::info;
use std::collections::btree_set::BTreeSet;

pub(crate) fn build_tables(
syntax_grammar: &SyntaxGrammar,
Expand All @@ -28,15 +29,10 @@ pub(crate) fn build_tables(
variable_info: &Vec<VariableInfo>,
inlines: &InlinedProductionMap,
minimize: bool,
state_ids_to_log: Vec<usize>,
report_symbol_name: Option<&str>,
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
let (mut parse_table, following_tokens) = build_parse_table(
syntax_grammar,
lexical_grammar,
inlines,
variable_info,
state_ids_to_log,
)?;
let (mut parse_table, following_tokens, parse_state_info) =
build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?;
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
let keywords = identify_keywords(
Expand Down Expand Up @@ -76,6 +72,16 @@ pub(crate) fn build_tables(
);
populate_external_lex_states(&mut parse_table, syntax_grammar);
mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map);

if let Some(report_symbol_name) = report_symbol_name {
report_state_info(
&syntax_grammar,
&lexical_grammar,
&parse_table,
&parse_state_info,
report_symbol_name,
);
}
Ok((
parse_table,
main_lex_table,
Expand Down Expand Up @@ -383,6 +389,91 @@ fn mark_fragile_tokens(
}
}

fn report_state_info<'a>(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
parse_table: &ParseTable,
parse_state_info: &Vec<ParseStateInfo<'a>>,
report_symbol_name: &'a str,
) {
let mut all_state_indices = BTreeSet::new();
let mut symbols_with_state_indices = (0..syntax_grammar.variables.len())
.map(|i| (Symbol::non_terminal(i), BTreeSet::new()))
.collect::<Vec<_>>();

for (i, state) in parse_table.states.iter().enumerate() {
all_state_indices.insert(i);
let item_set = &parse_state_info[state.id];
for (item, _) in item_set.1.entries.iter() {
if !item.is_augmented() {
symbols_with_state_indices[item.variable_index as usize]
.1
.insert(i);
}
}
}

symbols_with_state_indices.sort_unstable_by_key(|(_, states)| -(states.len() as i32));

let max_symbol_name_length = syntax_grammar
.variables
.iter()
.map(|v| v.name.len())
.max()
.unwrap();
for (symbol, states) in &symbols_with_state_indices {
eprintln!(
"{:width$}\t{}",
syntax_grammar.variables[symbol.index].name,
states.len(),
width = max_symbol_name_length
);
}
eprintln!("");

let state_indices = if report_symbol_name == "*" {
Some(&all_state_indices)
} else {
symbols_with_state_indices
.iter()
.find_map(|(symbol, state_indices)| {
if syntax_grammar.variables[symbol.index].name == report_symbol_name {
Some(state_indices)
} else {
None
}
})
};

if let Some(state_indices) = state_indices {
let mut state_indices = state_indices.into_iter().cloned().collect::<Vec<_>>();
state_indices
.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i));

for state_index in state_indices {
let id = parse_table.states[state_index].id;
let (preceding_symbols, item_set) = &parse_state_info[id];
eprintln!("state index: {}", state_index);
eprintln!("state id: {}", id);
eprint!("symbol sequence:");
for symbol in preceding_symbols {
let name = if symbol.is_terminal() {
&lexical_grammar.variables[symbol.index].name
} else if symbol.is_external() {
&syntax_grammar.external_tokens[symbol.index].name
} else {
&syntax_grammar.variables[symbol.index].name
};
eprint!(" {}", name);
}
eprintln!(
"\nitems:\n{}",
self::item::ParseItemSetDisplay(&item_set, syntax_grammar, lexical_grammar,),
);
}
}
}

fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
cursor.transition_chars().all(|(chars, is_sep)| {
if is_sep {
Expand Down
10 changes: 5 additions & 5 deletions cli/src/generate/mod.rs
Expand Up @@ -38,7 +38,7 @@ pub fn generate_parser_in_directory(
repo_path: &PathBuf,
grammar_path: Option<&str>,
minimize: bool,
state_ids_to_log: Vec<usize>,
report_symbol_name: Option<&str>,
) -> Result<()> {
let repo_src_path = repo_path.join("src");
let repo_header_path = repo_src_path.join("tree_sitter");
Expand All @@ -62,7 +62,7 @@ pub fn generate_parser_in_directory(
name: language_name,
c_code,
node_types_json,
} = generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
} = generate_parser_for_grammar_with_opts(&grammar_json, minimize, report_symbol_name)?;

write_file(&repo_src_path.join("parser.c"), c_code)?;
write_file(&repo_src_path.join("node-types.json"), node_types_json)?;
Expand All @@ -85,14 +85,14 @@ pub fn generate_parser_in_directory(

pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
let parser = generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())?;
let parser = generate_parser_for_grammar_with_opts(&grammar_json, true, None)?;
Ok((parser.name, parser.c_code))
}

fn generate_parser_for_grammar_with_opts(
grammar_json: &str,
minimize: bool,
state_ids_to_log: Vec<usize>,
report_symbol_name: Option<&str>,
) -> Result<GeneratedParser> {
let input_grammar = parse_grammar(grammar_json)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
Expand All @@ -111,7 +111,7 @@ fn generate_parser_for_grammar_with_opts(
&variable_info,
&inlines,
minimize,
state_ids_to_log,
report_symbol_name,
)?;
let name = input_grammar.name;
let c_code = render_c_code(
Expand Down

0 comments on commit d4ca2c8

Please sign in to comment.