Skip to content

Commit

Permalink
Merge pull request #1952 from tree-sitter/tree-included-ranges
Browse files Browse the repository at this point in the history
Allow retrieving a tree's list of included ranges, fix some included range bugs
  • Loading branch information
maxbrunsfeld committed Nov 15, 2022
2 parents fb5fbdd + 8e3dc7c commit 36b5b6c
Show file tree
Hide file tree
Showing 13 changed files with 369 additions and 104 deletions.
116 changes: 88 additions & 28 deletions cli/src/tests/corpus_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
util,
};
use std::fs;
use tree_sitter::{LogType, Node, Parser, Tree};
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};

#[test]
fn test_bash_corpus() {
Expand Down Expand Up @@ -79,40 +79,49 @@ fn test_rust_corpus() {
fn test_language_corpus(language_name: &str) {
let grammars_dir = fixtures_dir().join("grammars");
let error_corpus_dir = fixtures_dir().join("error_corpus");
let template_corpus_dir = fixtures_dir().join("template_corpus");
let mut corpus_dir = grammars_dir.join(language_name).join("corpus");
if !corpus_dir.is_dir() {
corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
}

let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name));
let template_corpus_file =
template_corpus_dir.join(&format!("{}_templates.txt", language_name));
let main_tests = parse_tests(&corpus_dir).unwrap();
let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default());
let template_tests = parse_tests(&template_corpus_file).unwrap_or(TestEntry::default());
let mut tests = flatten_tests(main_tests);
tests.extend(flatten_tests(error_tests));
tests.extend(flatten_tests(template_tests).into_iter().map(|mut t| {
t.template_delimiters = Some(("<%", "%>"));
t
}));

let language = get_language(language_name);
let mut failure_count = 0;
for (example_name, input, expected_output, has_fields) in tests {
println!(" {} example - {}", language_name, example_name);
for test in tests {
println!(" {} example - {}", language_name, test.name);

let passed = allocations::record(|| {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
set_included_ranges(&mut parser, &test.input, test.template_delimiters);

let tree = parser.parse(&input, None).unwrap();
let tree = parser.parse(&test.input, None).unwrap();
let mut actual_output = tree.root_node().to_sexp();
if !has_fields {
if !test.has_fields {
actual_output = strip_sexp_fields(actual_output);
}

if actual_output != expected_output {
if actual_output != test.output {
println!(
"Incorrect initial parse for {} - {}",
language_name, example_name,
language_name, test.name,
);
print_diff_key();
print_diff(&actual_output, &expected_output);
print_diff(&actual_output, &test.output);
println!("");
return false;
}
Expand All @@ -127,7 +136,7 @@ fn test_language_corpus(language_name: &str) {

let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(&input, None).unwrap();
let tree = parser.parse(&test.input, None).unwrap();
drop(parser);

for trial in 0..*ITERATION_COUNT {
Expand All @@ -138,7 +147,7 @@ fn test_language_corpus(language_name: &str) {
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let mut tree = tree.clone();
let mut input = input.clone();
let mut input = test.input.clone();

if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
Expand All @@ -158,6 +167,7 @@ fn test_language_corpus(language_name: &str) {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}

set_included_ranges(&mut parser, &input, test.template_delimiters);
let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();

// Check that the new tree is consistent.
Expand All @@ -178,21 +188,22 @@ fn test_language_corpus(language_name: &str) {
eprintln!("{}\n", String::from_utf8_lossy(&input));
}

set_included_ranges(&mut parser, &test.input, test.template_delimiters);
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();

// Verify that the final tree matches the expectation from the corpus.
let mut actual_output = tree3.root_node().to_sexp();
if !has_fields {
if !test.has_fields {
actual_output = strip_sexp_fields(actual_output);
}

if actual_output != expected_output {
if actual_output != test.output {
println!(
"Incorrect parse for {} - {} - seed {}",
language_name, example_name, seed
language_name, test.name, seed
);
print_diff_key();
print_diff(&actual_output, &expected_output);
print_diff(&actual_output, &test.output);
println!("");
return false;
}
Expand Down Expand Up @@ -293,23 +304,23 @@ fn test_feature_corpus_files() {
eprintln!("test language: {:?}", language_name);
}

for (name, input, expected_output, has_fields) in tests {
eprintln!(" example: {:?}", name);
for test in tests {
eprintln!(" example: {:?}", test.name);

let passed = allocations::record(|| {
let mut log_session = None;
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let tree = parser.parse(&input, None).unwrap();
let tree = parser.parse(&test.input, None).unwrap();
let mut actual_output = tree.root_node().to_sexp();
if !has_fields {
if !test.has_fields {
actual_output = strip_sexp_fields(actual_output);
}
if actual_output == expected_output {
if actual_output == test.output {
true
} else {
print_diff_key();
print_diff(&actual_output, &expected_output);
print_diff(&actual_output, &test.output);
println!("");
false
}
Expand Down Expand Up @@ -390,6 +401,7 @@ fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Re

let old_range = old_tree.root_node().range();
let new_range = new_tree.root_node().range();

let byte_range =
old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
let point_range = old_range.start_point.min(new_range.start_point)
Expand All @@ -407,6 +419,45 @@ fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Re
old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges)
}

fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
if let Some((start, end)) = delimiters {
let mut ranges = Vec::new();
let mut ix = 0;
while ix < input.len() {
let Some(mut start_ix) = input[ix..].windows(2).position(|win| win == start.as_bytes()) else { break };
start_ix += ix + start.len();
let end_ix = input[start_ix..]
.windows(2)
.position(|win| win == end.as_bytes())
.map_or(input.len(), |ix| start_ix + ix);
ix = end_ix;
ranges.push(Range {
start_byte: start_ix,
end_byte: end_ix,
start_point: point_for_offset(input, start_ix),
end_point: point_for_offset(input, end_ix),
});
}

parser.set_included_ranges(&ranges).unwrap();
} else {
parser.set_included_ranges(&[]).unwrap();
}
}

fn point_for_offset(text: &[u8], offset: usize) -> Point {
let mut point = Point::default();
for byte in &text[..offset] {
if *byte == b'\n' {
point.row += 1;
point.column = 0;
} else {
point.column += 1;
}
}
point
}

fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
let mut parser = Parser::new();

Expand All @@ -425,13 +476,16 @@ fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Par
parser
}

fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
fn helper(
test: TestEntry,
is_root: bool,
prefix: &str,
result: &mut Vec<(String, Vec<u8>, String, bool)>,
) {
struct FlattenedTest {
name: String,
input: Vec<u8>,
output: String,
has_fields: bool,
template_delimiters: Option<(&'static str, &'static str)>,
}

fn flatten_tests(test: TestEntry) -> Vec<FlattenedTest> {
fn helper(test: TestEntry, is_root: bool, prefix: &str, result: &mut Vec<FlattenedTest>) {
match test {
TestEntry::Example {
mut name,
Expand All @@ -448,7 +502,13 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
return;
}
}
result.push((name, input, output, has_fields));
result.push(FlattenedTest {
name,
input,
output,
has_fields,
template_delimiters: None,
});
}
TestEntry::Group {
mut name, children, ..
Expand Down
2 changes: 1 addition & 1 deletion cli/src/tests/helpers/random.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use rand::{
};

const OPERATORS: &[char] = &[
'+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.',
'+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.', '%',
];

pub struct Rand(StdRng);
Expand Down
16 changes: 3 additions & 13 deletions cli/src/tests/helpers/scope_sequence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,10 @@ impl ScopeSequence {
text: &Vec<u8>,
known_changed_ranges: &Vec<Range>,
) -> Result<(), String> {
if self.0.len() != text.len() {
panic!(
"Inconsistent scope sequence: {:?}",
self.0
.iter()
.zip(text.iter().map(|c| *c as char))
.collect::<Vec<_>>()
);
}

assert_eq!(self.0.len(), other.0.len());
let mut position = Point { row: 0, column: 0 };
for (i, stack) in self.0.iter().enumerate() {
let other_stack = &other.0[i];
for i in 0..(self.0.len().max(other.0.len())) {
let stack = &self.0.get(i);
let other_stack = &other.0.get(i);
if *stack != *other_stack && ![b'\r', b'\n'].contains(&text[i]) {
let containing_range = known_changed_ranges
.iter()
Expand Down

0 comments on commit 36b5b6c

Please sign in to comment.