Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python API typing, lint, config/makefile #3023

Merged
merged 4 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions .github/workflows/ci-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ concurrency:
jobs:
gcc-build-test:
name: gcc build & test
needs: [clang-formatting-check, sanity-checks]
needs: [clang-formatting-check, sanity-checks, python-lint-check]
runs-on: kuzu-self-hosted-testing
env:
NUM_THREADS: 32
Expand Down Expand Up @@ -95,10 +95,10 @@ jobs:

- name: Start Docker container
run: |
docker run -d --name kuzu-x86 \
-v $PWD:/kuzu -w /kuzu \
-e NUM_THREADS=2 -e GEN=ninja -e CC=gcc -e CXX=g++ \
i386/debian:latest tail -f /dev/null
docker run -d --name kuzu-x86 \
-v $PWD:/kuzu -w /kuzu \
-e NUM_THREADS=2 -e GEN=ninja -e CC=gcc -e CXX=g++ \
i386/debian:latest tail -f /dev/null

- name: Install dependencies
run: |
Expand Down Expand Up @@ -161,7 +161,7 @@ jobs:

clang-build-test:
name: clang build and test
needs: [clang-formatting-check, sanity-checks]
needs: [clang-formatting-check, sanity-checks, python-lint-check]
runs-on: kuzu-self-hosted-testing
env:
NUM_THREADS: 32
Expand Down Expand Up @@ -209,7 +209,7 @@ jobs:

msvc-build-test:
name: msvc build & test
needs: [clang-formatting-check, sanity-checks]
needs: [clang-formatting-check, sanity-checks, python-lint-check]
runs-on: self-hosted-windows
env:
# Shorten build path as much as possible
Expand Down Expand Up @@ -343,6 +343,23 @@ jobs:
- name: Check extension format
run: python3 scripts/run-clang-format.py --clang-format-executable /usr/bin/clang-format-11 -r extension/

python-lint-check:
name: python lint check
runs-on: macos-14
steps:
- uses: actions/checkout@v3

- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: "3.11"

- name: Run Python lint
working-directory: tools/python_api
run: |
make requirements
./.venv/bin/ruff check src_py test --verbose

rustfmt-check:
name: rustfmt check
runs-on: ubuntu-22.04
Expand Down Expand Up @@ -399,7 +416,7 @@ jobs:

macos-build-test:
name: apple clang build & test
needs: [clang-formatting-check, sanity-checks, rustfmt-check]
needs: [clang-formatting-check, sanity-checks, rustfmt-check, python-lint-check]
runs-on: self-hosted-mac-x64
env:
NUM_THREADS: 32
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ compile_commands.json
### Python
# Byte-compiled / optimized / DLL files
__pycache__/
.venv/
*.py[cod]
*$py.class
cmake-build-debug/
Expand Down
28 changes: 22 additions & 6 deletions scripts/pip-package/package_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,23 @@

base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))


def _get_kuzu_version():
cmake_file = os.path.abspath(os.path.join(base_dir, '..', 'CMakeLists.txt'))
cmake_file = os.path.abspath(os.path.join(base_dir, "..", "CMakeLists.txt"))
with open(cmake_file) as f:
for line in f:
if line.startswith('project(Kuzu VERSION'):
raw_version = line.split(' ')[2].strip()
version_nums = raw_version.split('.')
if line.startswith("project(Kuzu VERSION"):
raw_version = line.split(" ")[2].strip()
version_nums = raw_version.split(".")
if len(version_nums) <= 3:
return raw_version
else:
dev_suffix = version_nums[3]
version = '.'.join(version_nums[:3])
version = ".".join(version_nums[:3])
version += ".dev%s" % dev_suffix
return version


if __name__ == "__main__":
if len(sys.argv) == 2:
file_name = sys.argv[1]
Expand Down Expand Up @@ -53,8 +55,22 @@ def _get_kuzu_version():
os.makedirs(os.path.join(tempdir, "kuzu"))
for path in ["setup.py", "setup.cfg", "MANIFEST.in"]:
shutil.copy2(path, os.path.join(tempdir, path))
shutil.copy2("../../LICENSE", os.path.join(tempdir, "LICENSE.txt"))
shutil.copy2("../../LICENSE", os.path.join(tempdir, "LICENSE"))
shutil.copy2("../../README.md", os.path.join(tempdir, "README.md"))

shutil.copy2(
"../../tools/python_api/pyproject.toml",
os.path.join(tempdir, "pyproject.toml"),
)
# Update the version in pyproject.toml
with open(os.path.join(tempdir, "pyproject.toml"), "r") as f:
lines = f.readlines()
with open(os.path.join(tempdir, "pyproject.toml"), "w") as f:
for line in lines:
if line.startswith("version ="):
f.write('version = "%s"\n' % _get_kuzu_version())
else:
f.write(line)
shutil.copy2("README.md", os.path.join(tempdir, "README_PYTHON_BUILD.md"))

subprocess.check_call([sys.executable, "setup.py", "egg_info"], cwd=tempdir)
Expand Down
42 changes: 42 additions & 0 deletions tools/python_api/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
.DEFAULT_GOAL := help

PYTHONPATH=
SHELL=/bin/bash
VENV=.venv

ifeq ($(OS),Windows_NT)
VENV_BIN=$(VENV)/Scripts
else
VENV_BIN=$(VENV)/bin
endif

.venv: ## Set up a Python virtual environment and install dev packages
python3 -m venv $(VENV)
$(MAKE) requirements

.PHONY: requirements
requirements: .venv ## Install/update Python dev packages
@unset CONDA_PREFIX \
&& $(VENV_BIN)/python -m pip install -U uv \
&& $(VENV_BIN)/uv pip install --upgrade -r requirements_dev.txt \

.PHONY: lint
lint: ## Apply autoformatting and linting rules
$(VENV_BIN)/ruff check src_py test
$(VENV_BIN)/ruff format src_py test
-$(VENV_BIN)/mypy src_py test

.PHONY: build
build: ## Compile kuzu (and install in 'build') for Python
$(MAKE) -C ../../ python
cp src_py/*.py build/kuzu/

.PHONY: test
test: ## Run the Python unit tests
cp src_py/*.py build/kuzu/ && cd build
$(VENV_BIN)/pytest test

.PHONY: help
help: ## Display this help information
@echo -e "\033[1mAvailable commands:\033[0m"
@grep -E '^[a-z.A-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-18s\033[0m %s\n", $$1, $$2}' | sort
97 changes: 97 additions & 0 deletions tools/python_api/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
[project]
name = "kuzu"
description = "Highly scalable, extremely fast, easy-to-use embeddable graph database"
readme = "README.md"
license = { file = "LICENSE" }
keywords = ["graph", "database"]
version = "0.0.1"

[project.urls]
Homepage = "https://kuzudb.com/"
Documentation = "https://docs.kuzudb.com/"
Repository = "https://github.com/kuzudb/kuzu"
Changelog = "https://github.com/kuzudb/kuzu/releases"

[tool.mypy]
files = ["src_py", "test"]
strict = true
enable_error_code = [
"redundant-expr",
"truthy-bool",
"ignore-without-code",
]
disable_error_code = [
"import-not-found",
"no-any-return",
]

[[tool.mypy.overrides]]
module = [
"_kuzu.*",
"networkx.*",
"pandas.*",
"pyarrow.*",
"pytz.*",
"torch_geometric.*",
]
ignore_missing_imports = true

[tool.ruff]
line-length = 119
preview = true
fix = true

[tool.ruff.lint]
select = [
"E", # pycodestyle
"W", # pycodestyle
"F", # Pyflakes
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"D", # flake8-docstrings
"D213", # Augment NumPy docstring convention: Multi-line docstring summary should start at the second line
"D417", # Augment NumPy docstring convention: Missing argument descriptions
"I", # isort
"SIM", # flake8-simplify
"TCH", # flake8-type-checking
"TID", # flake8-tidy-imports
"UP", # pyupgrade
"PT", # flake8-pytest-style
"RUF", # Ruff-specific rules
"PTH", # flake8-use-pathlib
"FA", # flake8-future-annotations
"PIE", # flake8-pie
"TD", # flake8-todos
"TRY", # tryceratops
"EM", # flake8-errmsg
"FBT001", # flake8-boolean-trap
]
ignore = [
"D401", # First line should be in imperative mood
"D105", # Missing docstring in magic method
"D100", # Missing docstring in public module
"D104", # Missing docstring in public package
"D205", # 1 blank line required between summary line and description
"TD002", # Missing author in TODO
"TD003", # Missing issue link on the line following TODO
]

[tool.ruff.lint.per-file-ignores]
"test/**/*.py" = ["D100", "D102", "D103", "E501", "F841", "TCH002"]
"src_py/torch_geo*.py" = ["E501", "FBT001"]

[tool.ruff.lint.pycodestyle]
max-doc-length = 119

[tool.ruff.lint.pydocstyle]
convention = "numpy"

[tool.ruff.lint.flake8-type-checking]
strict = true

[tool.ruff.format]
docstring-code-format = true

[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
18 changes: 13 additions & 5 deletions tools/python_api/requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
pybind11>=2.6.0
pytest
# required for tests
networkx~=3.0
numpy
pandas
polars
networkx~=3.0.0
numpy
pyarrow==14.0.1
pyarrow>=14
pybind11>=2.6.0
pytest
torch
torch-geometric~=2.3.0
setuptools~=69.0

# required for lint/formatting
ruff==0.3.2
mypy==1.9.0
uv==0.1.17
2 changes: 1 addition & 1 deletion tools/python_api/src_cpp/py_connection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ std::unordered_map<std::string, std::unique_ptr<Value>> transformPythonParameter
if (!py::isinstance<py::str>(key)) {
// TODO(Chang): remove ROLLBACK once we can guarantee database is deleted after conn
conn->query("ROLLBACK");
throw std::runtime_error("Parameter name must be of type string but get " +
throw std::runtime_error("Parameter name must be of type string but got " +
py::str(key.get_type()).cast<std::string>());
}
auto name = key.cast<std::string>();
Expand Down
37 changes: 26 additions & 11 deletions tools/python_api/src_py/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
# Kùzu Python API bindings
# Kùzu Python API bindings.

This package provides a Python API for Kùzu graph database management system.

Expand All @@ -12,7 +12,7 @@
```python
import kuzu

db = kuzu.Database('./test')
db = kuzu.Database("./test")
conn = kuzu.Connection(db)

# Define the schema
Expand All @@ -28,7 +28,7 @@
conn.execute('COPY LivesIn FROM "lives-in.csv"')

# Query the data
results = conn.execute('MATCH (u:User) RETURN u.name, u.age;')
results = conn.execute("MATCH (u:User) RETURN u.name, u.age;")
while results.has_next():
print(results.get_next())
```
Expand All @@ -37,29 +37,44 @@

"""

import sys
from __future__ import annotations

import os
import sys

# Set RTLD_GLOBAL and RTLD_LAZY flags on Linux to fix the issue with loading
# extensions
if sys.platform == "linux":
original_dlopen_flags = sys.getdlopenflags()
sys.setdlopenflags(os.RTLD_GLOBAL | os.RTLD_LAZY)

from .database import *
from .connection import *
from .query_result import *
from .types import *
from .connection import Connection
from .database import Database
from .prepared_statement import PreparedStatement
from .query_result import QueryResult
from .types import Type

def __getattr__(name):

def __getattr__(name: str) -> str | int:
if name == "version":
return Database.get_version()
elif name == "storage_version":
return Database.get_storage_version()
else:
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

msg = f"module {__name__!r} has no attribute {name!r}"
raise AttributeError(msg)


# Restore the original dlopen flags
if sys.platform == "linux":
sys.setdlopenflags(original_dlopen_flags)

__all__ = [
"Connection",
"Database",
"PreparedStatement",
"QueryResult",
"Type",
"storage_version",
"version",
]
Loading
Loading