Skip to content

Commit

Permalink
python3.pkgs.tree-sitter-languages: use prebuilt grammars
Browse files Browse the repository at this point in the history
  • Loading branch information
milahu committed Feb 20, 2024
1 parent 48f3c13 commit 85dccc2
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 83 deletions.
102 changes: 19 additions & 83 deletions pkgs/python3/pkgs/tree-sitter-languages/tree-sitter-languages.nix
Original file line number Diff line number Diff line change
@@ -1,45 +1,9 @@
# FIXME use parser binaries from tree-sitter-grammars
# compiling all grammars into one binary is a waste of build time

{ lib
, python3
, fetchFromGitHub
, tree-sitter-grammars
}:

/*
# debug: build faster
let old-tree-sitter-grammars = tree-sitter-grammars; in
let
tree-sitter-grammars = {
tree-sitter-html = old-tree-sitter-grammars.tree-sitter-html;
};
in
*/

# update grammars to fix build errors: multiple definition of ...
# helper functions must be declared as "static"
# see also
# https://github.com/tree-sitter/tree-sitter-html/pull/64
# https://github.com/grantjenks/py-tree-sitter-languages/issues/55
let old-tree-sitter-grammars = tree-sitter-grammars; in
let
tree-sitter-grammars = old-tree-sitter-grammars // {
# https://github.com/Himujjal/tree-sitter-svelte/issues/56
tree-sitter-svelte = null;
# https://github.com/ikatyang/tree-sitter-vue/issues/27
tree-sitter-vue = null;
tree-sitter-rst = old-tree-sitter-grammars.tree-sitter-rst.overrideAttrs (oldAttrs: {
src = fetchFromGitHub {
owner = "stsewd";
repo = "tree-sitter-rst";
rev = "3ba9eb9b5a47aadb1f2356a3cab0dd3d2bd00b4b";
hash = "sha256-0w11mtDcIc2ol9Alg4ukV33OzXADOeJDx+3uxV1hGfs=";
};
});
};
in

python3.pkgs.buildPythonPackage rec {
pname = "tree-sitter-languages";
version = "1.10.2";
Expand All @@ -52,6 +16,14 @@ python3.pkgs.buildPythonPackage rec {
hash = "sha256-AuPK15xtLiQx6N2OATVJFecsL8k3pOagrWu1GascbwM=";
};

patches = [
# this has 2 benefits:
# 1. this package builds 1000x faster
# 2. no more symbol conflicts between parsers
# https://github.com/grantjenks/py-tree-sitter-languages/issues/55
./use-prebuilt-grammars.patch
];

buildInputs = [
python3.pkgs.cython
];
Expand All @@ -65,62 +37,26 @@ python3.pkgs.buildPythonPackage rec {
python3.pkgs.tree-sitter
];

postUnpack = ''
cd $sourceRoot
mkdir vendor
${
builtins.concatStringsSep "" (
builtins.attrValues (
builtins.mapAttrs
(n: p:
"ln -v -s ${p.src.outPath} vendor/${n}\n"
)
(lib.filterAttrs (k: v: v ? src) tree-sitter-grammars)
)
)
}
cd ..
'';

postBuild = ''
echo creating $out/${python3.sitePackages}/tree_sitter_languages/languages.so
repo_paths=(
languages=$out/${python3.sitePackages}/tree_sitter_languages/languages
echo creating $languages
mkdir -p $languages
${
builtins.concatStringsSep "" (
builtins.attrValues (
builtins.mapAttrs
(n: p:
" 'vendor/${n}'\n"
(_n: p:
# 12 == builtins.stringLength "tree-sitter-"
let n = builtins.substring 12 999 _n; in
''
echo adding language ${n}
ln -s ${p.outPath}/parser $languages/${n}
''
)
(lib.filterAttrs (k: v: v ? src) tree-sitter-grammars)
(lib.filterAttrs (k: v: v ? outPath) tree-sitter-grammars)
)
)
}
)
# get actual repo paths
# fix: No such file or directory: 'vendor/tree-sitter-markdown/src/parser.c
for idx in ''${!repo_paths[@]}; do
dir=''${repo_paths[$idx]}
[ -e $dir/src/parser.c ] && continue
parser=$(find $dir -path '*/src/parser.c')
dir=''${parser%/src/parser.c}
repo_paths[$idx]=$dir
done
#mkdir -p $out/${python3.sitePackages}/tree_sitter_languages
build_py=$(
echo "import tree_sitter"
echo "repo_paths = ["
for dir in ''${repo_paths[@]}; do
echo " '$dir',"
done
echo "]"
echo "output_path = '$out/${python3.sitePackages}/tree_sitter_languages/languages.so'"
echo "tree_sitter.Language.build_library(output_path, repo_paths)"
)
echo "$build_py" | grep -n "" # debug
python3 -c "$build_py"
'';

pythonImportsCheck = [ "tree_sitter_languages" ];
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
diff --git a/tree_sitter_languages/core.pyx b/tree_sitter_languages/core.pyx
index a27377c..f01480b 100644
--- a/tree_sitter_languages/core.pyx
+++ b/tree_sitter_languages/core.pyx
@@ -5,12 +5,7 @@ from tree_sitter import Language, Parser


def get_language(language):
- if sys.platform == 'win32':
- filename = 'languages.dll'
- else:
- filename = 'languages.so'
-
- binary_path = str(pathlib.Path(__file__).parent / filename)
+ binary_path = str(pathlib.Path(__file__).parent / "languages" / language)
language = Language(binary_path, language)
return language

0 comments on commit 85dccc2

Please sign in to comment.