diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ff0d847 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +build +dist +*.pyc +*.egg-info +*.so +__pycache__ +test/fixtures/tree-sitter-python diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a5f5acc --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tree-sitter"] + path = tree_sitter/core + url = https://github.com/tree-sitter/tree-sitter diff --git a/script/fetch-fixtures b/script/fetch-fixtures new file mode 100755 index 0000000..b4e7f3e --- /dev/null +++ b/script/fetch-fixtures @@ -0,0 +1,14 @@ +#!/bin/bash + +language_dir=test/fixtures/tree-sitter-python +language_url=https://github.com/tree-sitter/tree-sitter-python + +if [ ! -d $language_dir ]; then + git clone $language_url $language_dir --depth=1 +fi + +( + cd $language_dir + git fetch origin master --depth 1 + git reset --hard FETCH_HEAD +) diff --git a/script/test b/script/test new file mode 100755 index 0000000..45615a9 --- /dev/null +++ b/script/test @@ -0,0 +1,10 @@ +#!/bin/bash + +if which python3 > /dev/null; then + py=python3 +else + py=python +fi + + +CFLAGS="-O0 -g" $py -- setup.py --quiet test diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..cd511ff --- /dev/null +++ b/setup.py @@ -0,0 +1,45 @@ +""" +Py-Tree-sitter +""" + + +from setuptools import setup, Extension + + +setup( + name = "tree_sitter", + version = "0.0.1", + maintainer = "Max Brunsfeld", + maintainer_email = "maxbrunsfeld@gmail.com", + author = "Max Brunsfeld", + author_email = "maxbrunsfeld@gmail.com", + url = "https://github.com/tree-sitter/py-tree-sitter", + license = "MIT", + platforms = ["any"], + python_requires = ">=2.7", + description = "Python bindings to the Tree-sitter parsing library", + classifiers = [ + "Topic :: Parsing", + ], + packages = ['tree_sitter'], + ext_modules = [ + Extension( + "tree_sitter_binding", + [ + "tree_sitter/core/lib/src/lib.c", + "tree_sitter/binding.c", + ], + include_dirs = [ + "tree_sitter/core/lib/include", + "tree_sitter/core/lib/utf8proc", + ], + extra_compile_args = [ + "-std=c99", + ], + ) + ], + project_urls = { + 'Source': 'https://github.com/tree-sitter/py-tree-sitter', + 'Documentation': 'http://initd.org/psycopg/docs/', + } +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..aa8ada2 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,15 @@ +import unittest +from tree_sitter import Parser, Language + + +lib_path = "build/python.so" +Language.build("tests/fixtures/tree-sitter-python", lib_path) +language = Language(lib_path, "python") + + +class TestTreeSitter(unittest.TestCase): + def test_upper(self): + parser = Parser() + parser.set_language(language) + tree = parser.parse("def foo():\n bar()") + self.assertEqual(tree.root_node.type, "module") diff --git a/tree_sitter/__init__.py b/tree_sitter/__init__.py new file mode 100644 index 0000000..9edc7a4 --- /dev/null +++ b/tree_sitter/__init__.py @@ -0,0 +1,41 @@ +from ctypes import cdll, c_void_p +from tree_sitter_binding import Parser +import subprocess +import os.path as path + + +INCLUDE_PATH = path.join(path.dirname(__file__), "core", "lib", "include") + + +class Language: + def build(repo_path, output_path): + compiler = "clang++" + src_path = path.join(repo_path, "src") + parser_path = path.join(src_path, "parser.c") + + command = [ + compiler, + "-shared", + "-o", + output_path, + "-I", + INCLUDE_PATH, + "-xc", + path.join(src_path, "parser.c") + ] + + if path.exists(path.join(src_path, "scanner.cc")): + command.append("-xc++") + command.append(path.join(src_path, "scanner.cc")) + elif path.exists(path.join(src_path, "scanner.c")): + command.append(path.join(src_path, "scanner.c")) + + subprocess.run(command) + + def __init__(self, path, name): + self.path = path + self.name = name + self.lib = cdll.LoadLibrary(path) + function = getattr(self.lib, "tree_sitter_%s" % name) + function.restype = c_void_p + self.language_id = function() diff --git a/tree_sitter/binding.c b/tree_sitter/binding.c new file mode 100644 index 0000000..a769b5d --- /dev/null +++ b/tree_sitter/binding.c @@ -0,0 +1,271 @@ +#include "Python.h" +#include "tree_sitter/api.h" + +// Types + +typedef struct { + PyObject_HEAD + TSNode node; +} Node; + +typedef struct { + PyObject_HEAD + TSTree *tree; +} Tree; + +typedef struct { + PyObject_HEAD + TSParser *parser; +} Parser; + +// Node + +static PyObject *node_new( + PyTypeObject *type, + PyObject *args, + PyObject *kwds +) { + PyErr_SetString(PyExc_RuntimeError, "Illegal constructor"); + return NULL; +} + +static void node_dealloc(PyObject *self) { + Py_TYPE(self)->tp_free(self); +} + +static PyObject *node_string(Node *self, PyObject *args) { + char *string = ts_node_string(self->node); + PyObject *result = PyUnicode_FromString(string); + free(string); + return result; +} + +static PyObject *node_get_type(Node *self, void *payload) { + return PyUnicode_FromString(ts_node_type(self->node)); +} + +static PyMethodDef node_methods[] = { + { + .ml_name = "sexp", + .ml_meth = (PyCFunction)node_string, + .ml_flags = METH_NOARGS, + .ml_doc = "Get an S-expression representing the name", + }, + {NULL}, +}; + +static PyGetSetDef node_accessors[] = { + {"type", (getter)node_get_type, NULL, "kind", NULL}, + {NULL} +}; + +static PyTypeObject node_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "tree_sitter.Node", + .tp_doc = "A syntax node", + .tp_basicsize = sizeof(Node), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_new = node_new, + .tp_dealloc = node_dealloc, + .tp_methods = node_methods, + .tp_getset = node_accessors, +}; + +static PyObject *node_new_internal(TSNode node) { + Node *self = (Node *)node_type.tp_alloc(&node_type, 0); + if (self != NULL) self->node = node; + return (PyObject *)self; +} + +// Tree + +static PyObject *tree_new( + PyTypeObject *type, + PyObject *args, + PyObject *kwds +) { + PyErr_SetString(PyExc_RuntimeError, "Illegal constructor"); + return NULL; +} + +static void tree_dealloc(Tree *self) { + ts_tree_delete(self->tree); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject *tree_get_root_node(Tree *self, void *payload) { + return node_new_internal(ts_tree_root_node(self->tree)); +} + +static PyMethodDef tree_methods[] = { + {NULL}, +}; + +static PyGetSetDef tree_accessors[] = { + {"root_node", (getter)tree_get_root_node, NULL, "root node", NULL}, + {NULL} +}; + +static PyTypeObject tree_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "tree_sitter.Tree", + .tp_doc = "A Syntax Tree", + .tp_basicsize = sizeof(Tree), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_new = tree_new, + .tp_dealloc = (destructor)tree_dealloc, + .tp_methods = tree_methods, + .tp_getset = tree_accessors, +}; + +static PyObject *tree_new_internal(TSTree *tree) { + Tree *self = (Tree *)tree_type.tp_alloc(&tree_type, 0); + if (self != NULL) self->tree = tree; + return (PyObject *)self; +} + +// Parser + +static PyObject *parser_new( + PyTypeObject *type, + PyObject *args, + PyObject *kwds +) { + Parser *self = (Parser *)type->tp_alloc(type, 0); + if (self != NULL) self->parser = ts_parser_new(); + return (PyObject *)self; +} + +static void parser_dealloc(Parser *self) { + ts_parser_delete(self->parser); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject *parser_parse(Parser *self, PyObject *args) { + PyObject *source_code = NULL; + PyObject *old_tree_arg = NULL; + if (!PyArg_UnpackTuple(args, "ref", 1, 2, &source_code, &old_tree_arg)) { + return NULL; + } + + if (!PyUnicode_Check(source_code)) { + PyErr_SetString(PyExc_TypeError, "First argument to parse must be a string"); + return NULL; + } + + const TSTree *old_tree = NULL; + if (old_tree_arg) { + if (!PyObject_IsInstance(old_tree_arg, (PyObject *)&tree_type)) { + PyErr_SetString(PyExc_TypeError, "Second argument to parse must be a Tree"); + return NULL; + } + + old_tree = ((Tree *)old_tree_arg)->tree; + } + + TSTree *new_tree = NULL; + + PyUnicode_READY(source_code); + size_t length = PyUnicode_GET_LENGTH(source_code); + int kind = PyUnicode_KIND(source_code); + if (kind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *source_bytes = PyUnicode_1BYTE_DATA(source_code); + new_tree = ts_parser_parse_string(self->parser, old_tree, (char *)source_bytes, length); + } else if (kind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *source_bytes = PyUnicode_2BYTE_DATA(source_code); + new_tree = ts_parser_parse_string_encoding(self->parser, old_tree, (char *)source_bytes, length, TSInputEncodingUTF16); + } else if (kind == PyUnicode_4BYTE_KIND) { + PyErr_SetString(PyExc_ValueError, "4 byte strings are not yet supported"); + return NULL; + } else { + PyErr_SetString(PyExc_ValueError, "Unknown string kind"); + return NULL; + } + + if (!new_tree) { + PyErr_SetString(PyExc_ValueError, "Parsing failed"); + return NULL; + } + + return (PyObject *)tree_new_internal(new_tree); +} + +static PyObject *parser_set_language(Parser *self, PyObject *arg) { + PyObject *language_id = PyObject_GetAttrString(arg, "language_id"); + if (!language_id) { + PyErr_SetString(PyExc_TypeError, "Argument to set_language must be a Language"); + return NULL; + } + + if (!PyLong_Check(language_id)) { + PyErr_SetString(PyExc_TypeError, "Language ID must be an integer"); + return NULL; + } + + TSLanguage *language = (TSLanguage *)PyLong_AsLong(language_id); + if (!language) { + PyErr_SetString(PyExc_ValueError, "Language ID must not be null"); + return NULL; + } + + ts_parser_set_language(self->parser, language); + return Py_None; +} + +static PyMethodDef parser_methods[] = { + { + .ml_name = "parse", + .ml_meth = (PyCFunction)parser_parse, + .ml_flags = METH_VARARGS, + .ml_doc = "Parse source code, creating a syntax tree", + }, + { + .ml_name = "set_language", + .ml_meth = (PyCFunction)parser_set_language, + .ml_flags = METH_O, + .ml_doc = "Parse source code, creating a syntax tree", + }, + {NULL}, +}; + +static PyTypeObject parser_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "tree_sitter.Parser", + .tp_doc = "A Parser", + .tp_basicsize = sizeof(Parser), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_new = parser_new, + .tp_dealloc = (destructor)parser_dealloc, + .tp_methods = parser_methods, +}; + +// Module + +static struct PyModuleDef module_definition = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "tree_sitter", + .m_doc = NULL, + .m_size = -1, +}; + +PyMODINIT_FUNC PyInit_tree_sitter_binding(void) { + PyObject *module = PyModule_Create(&module_definition); + if (module == NULL) return NULL; + + if (PyType_Ready(&parser_type) < 0) return NULL; + Py_INCREF(&parser_type); + PyModule_AddObject(module, "Parser", (PyObject *)&parser_type); + + if (PyType_Ready(&tree_type) < 0) return NULL; + Py_INCREF(&tree_type); + PyModule_AddObject(module, "Tree", (PyObject *)&tree_type); + + if (PyType_Ready(&node_type) < 0) return NULL; + Py_INCREF(&node_type); + PyModule_AddObject(module, "Node", (PyObject *)&node_type); + + return module; +} diff --git a/tree_sitter/core b/tree_sitter/core new file mode 160000 index 0000000..60265e8 --- /dev/null +++ b/tree_sitter/core @@ -0,0 +1 @@ +Subproject commit 60265e807cf5bd6a4e3b41f8ca9ddcf3a592eec1