Skip to content

Commit

Permalink
feat: allow specifying an external scanner's files
Browse files Browse the repository at this point in the history
  • Loading branch information
amaanq committed Feb 16, 2024
1 parent b6c75cc commit 4342efd
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 26 deletions.
2 changes: 1 addition & 1 deletion cli/benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) ->
fn get_language(path: &Path) -> Language {
let src_dir = GRAMMARS_DIR.join(path).join("src");
TEST_LOADER
.load_language_at_path(&src_dir, &[&src_dir])
.load_language_at_path(&src_dir, &[&src_dir], None)
.with_context(|| format!("Failed to load language at path {src_dir:?}"))
.unwrap()
}
80 changes: 58 additions & 22 deletions cli/loader/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ pub struct LanguageConfiguration<'a> {

pub struct Loader {
parser_lib_path: PathBuf,
languages_by_id: Vec<(PathBuf, OnceCell<Language>)>,
languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
language_configurations: Vec<LanguageConfiguration<'static>>,
language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
language_configuration_in_current_path: Option<usize>,
Expand Down Expand Up @@ -347,11 +347,11 @@ impl Loader {
}

fn language_for_id(&self, id: usize) -> Result<Language> {
let (path, language) = &self.languages_by_id[id];
let (path, language, externals) = &self.languages_by_id[id];
language
.get_or_try_init(|| {
let src_path = path.join("src");
self.load_language_at_path(&src_path, &[&src_path])
self.load_language_at_path(&src_path, &[&src_path], externals.as_deref())
})
.cloned()
}
Expand All @@ -360,6 +360,7 @@ impl Loader {
&self,
src_path: &Path,
header_paths: &[&Path],
external_files: Option<&[PathBuf]>,
) -> Result<Language> {
let grammar_path = src_path.join("grammar.json");

Expand All @@ -372,14 +373,20 @@ impl Loader {
let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;

self.load_language_at_path_with_name(src_path, header_paths, &grammar_json.name)
self.load_language_at_path_with_name(
src_path,
header_paths,
&grammar_json.name,
external_files,
)
}

pub fn load_language_at_path_with_name(
&self,
src_path: &Path,
header_paths: &[&Path],
name: &str,
external_files: Option<&[PathBuf]>,
) -> Result<Language> {
let mut lib_name = name.to_string();
let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name));
Expand All @@ -395,12 +402,26 @@ impl Loader {
let parser_path = src_path.join("parser.c");
let scanner_path = self.get_scanner_path(src_path);

let paths_to_check = if let Some(external_files) = external_files {
let mut files = if let Some(scanner_path) = scanner_path.as_ref() {
vec![parser_path.clone(), scanner_path.to_path_buf()]
} else {
vec![parser_path.clone()]
};
for path in external_files {
files.push(src_path.join(path));
}
files
} else {
Vec::new()
};

#[cfg(feature = "wasm")]
if self.wasm_store.lock().unwrap().is_some() {
library_path.set_extension("wasm");
}

let mut recompile = needs_recompile(&library_path, &parser_path, scanner_path.as_deref())
let mut recompile = needs_recompile(&library_path, &paths_to_check)
.with_context(|| "Failed to compare source and binary timestamps")?;

#[cfg(feature = "wasm")]
Expand Down Expand Up @@ -808,7 +829,7 @@ impl Loader {
parser_path: &Path,
set_current_path_config: bool,
) -> Result<&[LanguageConfiguration]> {
#[derive(Default, Deserialize)]
#[derive(Deserialize, Clone, Default)]
#[serde(untagged)]
enum PathsJSON {
#[default]
Expand Down Expand Up @@ -848,6 +869,8 @@ impl Loader {
locals: PathsJSON,
#[serde(default)]
tags: PathsJSON,
#[serde(default, rename = "external-files")]
external_files: PathsJSON,
}

#[derive(Deserialize)]
Expand Down Expand Up @@ -883,7 +906,7 @@ impl Loader {
// Determine if a previous language configuration in this package.json file
// already uses the same language.
let mut language_id = None;
for (id, (path, _)) in
for (id, (path, _, _)) in
self.languages_by_id.iter().enumerate().skip(language_count)
{
if language_path == *path {
Expand All @@ -892,10 +915,29 @@ impl Loader {
}

// If not, add a new language path to the list.
let language_id = language_id.unwrap_or_else(|| {
self.languages_by_id.push((language_path, OnceCell::new()));
let language_id = if let Some(language_id) = language_id {
language_id
} else {
self.languages_by_id.push((
language_path,
OnceCell::new(),
config_json.external_files.clone().into_vec().map(|files| {
files.into_iter()
.map(|path| {
let path = parser_path.join(path);
// prevent p being above/outside of parser_path

if path.starts_with(parser_path) {
Ok(path)
} else {
Err(anyhow!("External file path {path:?} is outside of parser directory {parser_path:?}"))
}
})
.collect::<Result<Vec<_>>>()
}).transpose()?,
));
self.languages_by_id.len() - 1
});
};

let configuration = LanguageConfiguration {
root_path: parser_path.to_path_buf(),
Expand Down Expand Up @@ -972,7 +1014,7 @@ impl Loader {
self.language_configurations
.push(unsafe { mem::transmute(configuration) });
self.languages_by_id
.push((parser_path.to_owned(), OnceCell::new()));
.push((parser_path.to_owned(), OnceCell::new(), None));
}

Ok(&self.language_configurations[initial_language_configuration_count..])
Expand Down Expand Up @@ -1254,20 +1296,14 @@ impl<'a> LanguageConfiguration<'a> {
}
}

fn needs_recompile(
lib_path: &Path,
parser_c_path: &Path,
scanner_path: Option<&Path>,
) -> Result<bool> {
fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> Result<bool> {
if !lib_path.exists() {
return Ok(true);
}
let lib_mtime = mtime(lib_path)?;
if mtime(parser_c_path)? > lib_mtime {
return Ok(true);
}
if let Some(scanner_path) = scanner_path {
if mtime(scanner_path)? > lib_mtime {
let lib_mtime =
mtime(lib_path).with_context(|| format!("Failed to read mtime of {lib_path:?}"))?;
for path in paths_to_check {
if mtime(path)? > lib_mtime {
return Ok(true);
}
}
Expand Down
18 changes: 15 additions & 3 deletions cli/src/tests/helpers/fixtures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub fn get_language(name: &str) -> Language {
.load_language_at_path(
&GRAMMARS_DIR.join(name).join("src"),
&[&HEADER_DIR, &GRAMMARS_DIR.join(name).join("src")],
None,
)
.unwrap()
}
Expand Down Expand Up @@ -86,7 +87,7 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->
fs::write(&parser_path, parser_code).unwrap();
}

if let Some(path) = path {
let scanner_path = if let Some(path) = path {
let scanner_path = path.join("scanner.c");
if scanner_path.exists() {
let scanner_code = fs::read_to_string(&scanner_path).unwrap();
Expand All @@ -96,8 +97,13 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->
{
fs::write(&scanner_copy_path, scanner_code).unwrap();
}
Some(scanner_copy_path)
} else {
None
}
}
} else {
None
};

let header_path = src_dir.join("tree_sitter");
fs::create_dir_all(&header_path).unwrap();
Expand All @@ -110,7 +116,13 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->
})
.unwrap();

let paths_to_check = if let Some(scanner_path) = &scanner_path {
vec![parser_path.clone(), scanner_path.to_path_buf()]
} else {
vec![parser_path.clone()]
};

TEST_LOADER
.load_language_at_path_with_name(&src_dir, &[&HEADER_DIR], name)
.load_language_at_path_with_name(&src_dir, &[&HEADER_DIR], name, Some(&paths_to_check))
.unwrap()
}
5 changes: 5 additions & 0 deletions docs/section-4-syntax-highlighting.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ These keys specify basic information about the parser:

* `path` (optional) - A relative path from the directory containing `package.json` to another directory containing the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same folder as `package.json`), and this very rarely needs to be overridden.

* `external-files` (optional) - A list of relative paths from the root dir of a
parser to files that should be checked for modifications during recompilation.
This is useful during development to have changes to other files besides scanner.c
be picked up by the cli.

### Language Detection

These keys help to decide whether the language applies to a given file:
Expand Down

0 comments on commit 4342efd

Please sign in to comment.