Skip to content

Commit

Permalink
Only generate the new parse table format if --next-abi flag is used
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbrunsfeld committed Aug 30, 2019
1 parent aeb2f89 commit 8037607
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 20 deletions.
32 changes: 31 additions & 1 deletion cli/src/generate/mod.rs
Expand Up @@ -33,6 +33,16 @@ lazy_static! {
.unwrap();
}

const NEW_HEADER_PARTS: [&'static str; 2] = [
"
uint32_t large_state_count;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;",
"
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
",
];

struct GeneratedParser {
c_code: String,
node_types_json: String,
Expand All @@ -42,6 +52,7 @@ pub fn generate_parser_in_directory(
repo_path: &PathBuf,
grammar_path: Option<&str>,
properties_only: bool,
next_abi: bool,
report_symbol_name: Option<&str>,
) -> Result<()> {
let src_path = repo_path.join("src");
Expand Down Expand Up @@ -103,12 +114,28 @@ pub fn generate_parser_in_directory(
lexical_grammar,
inlines,
simple_aliases,
next_abi,
report_symbol_name,
)?;

write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;

if next_abi {
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
} else {
let mut header = tree_sitter::PARSER_HEADER.to_string();

for part in &NEW_HEADER_PARTS {
let pos = header
.find(part)
.expect("Missing expected part of parser.h header");
header.replace_range(pos..(pos + part.len()), "");
}

write_file(&header_path.join("parser.h"), header)?;
}

ensure_file(&repo_path.join("index.js"), || {
npm_files::index_js(&language_name)
})?;
Expand All @@ -134,6 +161,7 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String
lexical_grammar,
inlines,
simple_aliases,
true,
None,
)?;
Ok((input_grammar.name, parser.c_code))
Expand All @@ -145,6 +173,7 @@ fn generate_parser_for_grammar_with_opts(
lexical_grammar: LexicalGrammar,
inlines: InlinedProductionMap,
simple_aliases: AliasMap,
next_abi: bool,
report_symbol_name: Option<&str>,
) -> Result<GeneratedParser> {
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?;
Expand All @@ -171,6 +200,7 @@ fn generate_parser_for_grammar_with_opts(
syntax_grammar,
lexical_grammar,
simple_aliases,
next_abi,
);
Ok(GeneratedParser {
c_code,
Expand Down
90 changes: 71 additions & 19 deletions cli/src/generate/render.rs
Expand Up @@ -9,7 +9,10 @@ use std::cmp;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::Write;
use std::mem::swap;
use tree_sitter::LANGUAGE_VERSION;

// Currently, the library supports a new ABI version that has not yet been
// stabilized, and the parser generation does not use it by default.
const STABLE_LANGUAGE_VERSION: usize = tree_sitter::LANGUAGE_VERSION - 1;

macro_rules! add {
($this: tt, $($arg: tt)*) => {{
Expand Down Expand Up @@ -65,6 +68,7 @@ struct Generator {
alias_ids: HashMap<Alias, String>,
alias_map: BTreeMap<Alias, Option<Symbol>>,
field_names: Vec<String>,
next_abi: bool,
}

impl Generator {
Expand Down Expand Up @@ -149,23 +153,30 @@ impl Generator {
}
}

let threshold = cmp::min(
SMALL_STATE_THRESHOLD,
self.parse_table.symbols.len() / 2 - 1,
);
self.large_state_count = self
.parse_table
.states
.iter()
.enumerate()
.take_while(|(i, s)| {
*i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
})
.count();

field_names.sort_unstable();
field_names.dedup();
self.field_names = field_names.into_iter().cloned().collect();

// If we are opting in to the new unstable language ABI, then use the concept of
// "small parse states". Otherwise, use the same representation for all parse
// states.
if self.next_abi {
let threshold = cmp::min(
SMALL_STATE_THRESHOLD,
self.parse_table.symbols.len() / 2 - 1,
);
self.large_state_count = self
.parse_table
.states
.iter()
.enumerate()
.take_while(|(i, s)| {
*i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
})
.count();
} else {
self.large_state_count = self.parse_table.states.len();
}
}

fn add_includes(&mut self) {
Expand Down Expand Up @@ -216,13 +227,26 @@ impl Generator {
})
.count();

add_line!(self, "#define LANGUAGE_VERSION {}", LANGUAGE_VERSION);
if self.next_abi {
add_line!(
self,
"#define LANGUAGE_VERSION {}",
tree_sitter::LANGUAGE_VERSION
);
} else {
add_line!(self, "#define LANGUAGE_VERSION {}", STABLE_LANGUAGE_VERSION);
}

add_line!(
self,
"#define STATE_COUNT {}",
self.parse_table.states.len()
);
add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);

if self.next_abi {
add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
}

add_line!(
self,
"#define SYMBOL_COUNT {}",
Expand Down Expand Up @@ -755,7 +779,12 @@ impl Generator {

add_line!(
self,
"static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{"
"static uint16_t ts_parse_table[{}][SYMBOL_COUNT] = {{",
if self.next_abi {
"LARGE_STATE_COUNT"
} else {
"STATE_COUNT"
}
);
indent!(self);

Expand Down Expand Up @@ -959,7 +988,11 @@ impl Generator {
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
add_line!(self, ".alias_count = ALIAS_COUNT,");
add_line!(self, ".token_count = TOKEN_COUNT,");
add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");

if self.next_abi {
add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
}

add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
add_line!(
self,
Expand Down Expand Up @@ -1217,6 +1250,23 @@ impl Generator {
}
}

/// Returns a String of C code for the given components of a parser.
///
/// # Arguments
///
/// * `name` - A string slice containing the name of the language
/// * `parse_table` - The generated parse table for the language
/// * `main_lex_table` - The generated lexing table for the language
/// * `keyword_lex_table` - The generated keyword lexing table for the language
/// * `keyword_capture_token` - A symbol indicating which token is used
/// for keyword capture, if any.
/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
/// * `simple_aliases` - A map describing the global rename rules that should apply.
/// the keys are symbols that are *always* aliased in the same way, and the values
/// are the aliases that are applied to those symbols.
/// * `next_abi` - A boolean indicating whether to opt into the new, unstable parse
/// table format. This is mainly used for testing, when developing Tree-sitter itself.
pub(crate) fn render_c_code(
name: &str,
parse_table: ParseTable,
Expand All @@ -1226,6 +1276,7 @@ pub(crate) fn render_c_code(
syntax_grammar: SyntaxGrammar,
lexical_grammar: LexicalGrammar,
simple_aliases: AliasMap,
next_abi: bool,
) -> String {
Generator {
buffer: String::new(),
Expand All @@ -1244,6 +1295,7 @@ pub(crate) fn render_c_code(
alias_ids: HashMap::new(),
alias_map: BTreeMap::new(),
field_names: Vec::new(),
next_abi,
}
.generate()
}
Expand Down
3 changes: 3 additions & 0 deletions cli/src/main.rs
Expand Up @@ -38,6 +38,7 @@ fn run() -> error::Result<()> {
.about("Generate a parser")
.arg(Arg::with_name("grammar-path").index(1))
.arg(Arg::with_name("log").long("log"))
.arg(Arg::with_name("next-abi").long("next-abi"))
.arg(Arg::with_name("properties-only").long("properties"))
.arg(
Arg::with_name("report-states-for-rule")
Expand Down Expand Up @@ -137,10 +138,12 @@ fn run() -> error::Result<()> {
if matches.is_present("log") {
logger::init();
}
let next_abi = matches.is_present("next-abi");
generate::generate_parser_in_directory(
&current_dir,
grammar_path,
properties_only,
next_abi,
report_symbol_name,
)?;
} else if let Some(matches) = matches.subcommand_matches("test") {
Expand Down

0 comments on commit 8037607

Please sign in to comment.