Skip to content

Commit

Permalink
feat: expose the allocator and array header files for external scanners
Browse files Browse the repository at this point in the history
  • Loading branch information
amaanq committed Feb 26, 2024
1 parent 9e5bf65 commit 514e5d3
Show file tree
Hide file tree
Showing 12 changed files with 319 additions and 83 deletions.
13 changes: 13 additions & 0 deletions cli/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,19 @@ fn main() {
.unwrap()
.as_secs_f64();
println!("cargo:rustc-env=BUILD_TIME={build_time}");

#[cfg(any(
target_os = "linux",
target_os = "android",
target_os = "freebsd",
target_os = "openbsd",
target_os = "netbsd",
target_os = "dragonfly",
))]
println!("cargo:rustc-link-arg=-Wl,--dynamic-list=cli/dynamic-symbols.txt");

#[cfg(any(target_os = "macos", target_os = "ios"))]
println!("cargo:rustc-link-arg=-Wl,-exported_symbols_list,cli/dynamic-symbols-darwin.txt");
}

fn web_playground_files_present() -> bool {
Expand Down
4 changes: 4 additions & 0 deletions cli/dynamic-symbols-darwin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
_ts_current_malloc
_ts_current_calloc
_ts_current_realloc
_ts_current_free
6 changes: 6 additions & 0 deletions cli/dynamic-symbols.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
ts_current_malloc;
ts_current_calloc;
ts_current_realloc;
ts_current_free;
};
13 changes: 9 additions & 4 deletions cli/loader/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ impl Loader {
.cpp(true)
.opt_level(2)
.cargo_metadata(false)
.cargo_warnings(false)
.target(BUILD_TARGET)
.host(BUILD_TARGET)
.flag_if_supported("-Werror=implicit-function-declaration");
Expand Down Expand Up @@ -584,10 +585,6 @@ impl Loader {
command.arg("-O2");
}

// For conditional compilation of external scanner code when
// used internally by `tree-siteer parse` and other sub commands.
command.arg("-DTREE_SITTER_INTERNAL_BUILD");

if let Some(scanner_path) = scanner_path.as_ref() {
if scanner_path.extension() == Some("c".as_ref()) {
command.arg("-xc").arg("-std=c99").arg(scanner_path);
Expand All @@ -599,6 +596,14 @@ impl Loader {
command.arg("-xc").arg(parser_path);
}

// For conditional compilation of external scanner code when
// used internally by `tree-sitter parse` and other sub commands.
command.arg("-DTREE_SITTER_INTERNAL_BUILD");

// Always use the same allocator in the CLI as any scanner, useful for debugging and
// tracking memory leaks in tests.
command.arg("-DTS_REUSE_ALLOCATOR");

let output = command.output().with_context(|| {
format!("Failed to execute the C compiler with the following command:\n{command:?}")
})?;
Expand Down
2 changes: 2 additions & 0 deletions cli/src/generate/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ pub fn generate_parser_in_directory(

write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
write_file(&header_path.join("alloc.h"), tree_sitter::ALLOC_HEADER)?;
write_file(&header_path.join("array.h"), tree_sitter::ARRAY_HEADER)?;
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;

if !path_in_ignore(&repo_path) {
Expand Down
60 changes: 60 additions & 0 deletions cli/src/generate/templates/alloc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#ifdef _WIN32
#define TS_PUBLIC __declspec(dllexport)
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif

TS_PUBLIC extern void *(*ts_current_malloc)(size_t);
TS_PUBLIC extern void *(*ts_current_calloc)(size_t, size_t);
TS_PUBLIC extern void *(*ts_current_realloc)(void *, size_t);
TS_PUBLIC extern void (*ts_current_free)(void *);

// Allow clients to override allocation functions
#ifdef TS_REUSE_ALLOCATOR

#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif

#else

#ifndef ts_malloc
#define ts_malloc malloc
#endif
#ifndef ts_calloc
#define ts_calloc calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free free
#endif

#endif

#ifdef __cplusplus
}
#endif

#endif // TREE_SITTER_ALLOC_H_
19 changes: 19 additions & 0 deletions cli/src/tests/helpers/fixtures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,25 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->

let header_path = src_dir.join("tree_sitter");
fs::create_dir_all(&header_path).unwrap();

fs::write(header_path.join("alloc.h"), tree_sitter::PARSER_HEADER)
.with_context(|| {
format!(
"Failed to write {:?}",
header_path.join("alloc.h").file_name().unwrap()
)
})
.unwrap();

fs::write(header_path.join("array.h"), tree_sitter::PARSER_HEADER)
.with_context(|| {
format!(
"Failed to write {:?}",
header_path.join("array.h").file_name().unwrap()
)
})
.unwrap();

fs::write(header_path.join("parser.h"), tree_sitter::PARSER_HEADER)
.with_context(|| {
format!(
Expand Down
103 changes: 101 additions & 2 deletions docs/section-3-creating-parsers.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ The first time you run `tree-sitter generate`, it will also generate a few other
* `bindings/c/tree-sitter-language.h` - This file provides the C interface of your language.
* `bindings/c/tree-sitter-language.pc` - This file provides pkg-config metadata about your language's C library.
* `src/tree_sitter/parser.h` - This file provides some basic C definitions that are used in your generated `parser.c` file.
* `src/tree_sitter/alloc.h` - This file provides some memory allocation macros that are to be used in your external scanner, if you have one.
* `src/tree_sitter/array.h` - This file provides some array macros that are to be used in your external scanner, if you have one.

#### Go

Expand Down Expand Up @@ -144,7 +146,6 @@ The first time you run `tree-sitter generate`, it will also generate a few other
* `Package.swift` - This file tells Swift how to compile your language.
* `bindings/swift/TreeSitterLanguage/language.h` - This file wraps your language in a Swift module when used in Swift.


If there is an ambiguity or *local ambiguity* in your grammar, Tree-sitter will detect it during parser generation, and it will exit with a `Unresolved conflict` error message. See below for more information on these errors.

### Command: `test`
Expand Down Expand Up @@ -690,7 +691,9 @@ Then, add another C or C++ source file to your project. Currently, its path must
In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering of this enum must match the order in your grammar's `externals` array; the actual names do not matter.

```c
#include <tree_sitter/parser.h>
#include "tree_sitter/parser.h"
#include "tree_sitter/alloc.h"
#include "tree_sitter/array.h"

enum TokenType {
INDENT,
Expand Down Expand Up @@ -788,6 +791,102 @@ if (valid_symbols[INDENT] || valid_symbols[DEDENT]) {
return true;
}
}

#### External Scanner Helpers

As briefly mentioned earlier, `tree-sitter generate` provides some allocator and array macros to be used in your external scanner, if you need either of these features.

##### Allocation

Instead of using libc's `malloc`, `calloc`, `realloc`, and `free`, you should use the versions prefixed with `ts_` from `tree_sitter/alloc.h`.
These macros can allow a potential consumer to override the default allocator with their own implementation, but by default will use the libc functions.
For example, assuming you wanted to allocate 100 bytes for your scanner, you'd do so like the following example:

```c
#include "tree_sitter/parser.h"
#include "tree_sitter/alloc.h"
#include "tree_sitter/array.h"

// ...

void* tree_sitter_my_language_external_scanner_create() {
return ts_calloc(100, 1); // or ts_malloc(100)
}

// ...

```

##### Arrays

If you need to use array-like types in your scanner, such as tracking a stack of indentations or tags, you should use the array macros from `tree_sitter/array.h`.

There are quite a few of them provided for you, but here's how you could get started tracking some . Check out the header itself for more detailed documentation.

```c
#include "tree_sitter/parser.h"
#include "tree_sitter/alloc.h"
#include "tree_sitter/array.h"

enum TokenType {
INDENT,
DEDENT,
NEWLINE,
STRING,
}

// Create the array in your create function

void* tree_sitter_my_language_external_scanner_create() {
return ts_calloc(1, sizeof(Array(int)));

// or if you want to zero out the memory yourself

Array(int) *stack = ts_malloc(sizeof(Array(int)));
array_init(&stack);
return stack;
}

bool tree_sitter_my_language_external_scanner_scan(
void *payload,
TSLexer *lexer,
const bool *valid_symbols
) {
Array(int) *stack = payload;
if (valid_symbols[INDENT]) {
array_push(stack, lexer->get_column(lexer));
lexer->result_symbol = INDENT;
return true;
}
if (valid_symbols[DEDENT]) {
array_pop(stack); // this returns the popped element by value, but we don't need it
lexer->result_symbol = DEDENT;
return true;
}

// we can also use an array on the stack to keep track of a string

Array(char) next_string = array_new();

if (valid_symbols[STRING] && lexer->lookahead == '"') {
lexer->advance(lexer, false);
while (lexer->lookahead != '"' && lexer->lookahead != '\n' && !lexer->eof(lexer)) {
array_push(&next_string, lexer->lookahead);
lexer->advance(lexer, false);
}

// assume we have some arbitrary constraint of not having more than 100 characters in a string
if (lexer->lookahead == '"' && next_string.size <= 100) {
lexer->advance(lexer, false);
lexer->result_symbol = STRING;
return true;
}
}

return false;
}

```

#### Other External Scanner Details
Expand Down
2 changes: 2 additions & 0 deletions lib/binding_rust/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION as usize;
pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize =
ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION as usize;

pub const ALLOC_HEADER: &str = include_str!("../../cli/src/generate/templates/alloc.h");
pub const ARRAY_HEADER: &str = include_str!("../src/array.h");
pub const PARSER_HEADER: &str = include_str!("../src/parser.h");

/// An opaque object that defines how to parse a particular language. The code for each
Expand Down
8 changes: 4 additions & 4 deletions lib/src/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ static void *ts_realloc_default(void *buffer, size_t size) {
}

// Allow clients to override allocation functions dynamically
void *(*ts_current_malloc)(size_t) = ts_malloc_default;
void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
void (*ts_current_free)(void *) = free;
TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default;
TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
TS_PUBLIC void (*ts_current_free)(void *) = free;

void ts_set_allocator(
void *(*new_malloc)(size_t size),
Expand Down
20 changes: 12 additions & 8 deletions lib/src/alloc.h
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_

#include "tree_sitter/api.h"

#ifdef __cplusplus
extern "C" {
#endif

#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#ifdef _WIN32
#define TS_PUBLIC __declspec(dllexport)
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif

extern void *(*ts_current_malloc)(size_t);
extern void *(*ts_current_calloc)(size_t, size_t);
extern void *(*ts_current_realloc)(void *, size_t);
extern void (*ts_current_free)(void *);
TS_PUBLIC extern void *(*ts_current_malloc)(size_t);
TS_PUBLIC extern void *(*ts_current_calloc)(size_t, size_t);
TS_PUBLIC extern void *(*ts_current_realloc)(void *, size_t);
TS_PUBLIC extern void (*ts_current_free)(void *);

// Allow clients to override allocation functions
#ifndef ts_malloc
Expand All @@ -34,4 +38,4 @@ extern void (*ts_current_free)(void *);
}
#endif

#endif // TREE_SITTER_ALLOC_H_
#endif // TREE_SITTER_ALLOC_H_

0 comments on commit 514e5d3

Please sign in to comment.