diff --git a/.flake8 b/.flake8 index 6244ca21..10cfc5a3 100644 --- a/.flake8 +++ b/.flake8 @@ -10,35 +10,48 @@ ignore = E501 # whitespace before : -> black does not adhere to PEP8 E203 + # line break before binary operator -> black does not adhere to PEP8 + W503 # missing whitespace after ,', ';', or ':' -> black does not adhere to PEP8 E231 # continuation line over-indented for hanging indent -> black does not adhere to PEP8 E126 - # E266 too many leading '#' for block comment -> this is fine for indicating sections + # too many leading '#' for block comment -> this is fine for indicating sections E262 # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient E731 - # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation + # allow I, O, l as variable names -> I is the identity matrix E741 # Missing docstring in public package D104 - # ... imported but unused - F401 # Missing docstring in public module D100 # Missing docstring in __init__ D107 - # Do not perform function calls in argument defaults. + # Errors from function calls in argument defaults. These are fine when the result is immutable. B008 - # line break before binary operator - W503 # Missing docstring in magic method D105 - # whitespace before ':' - E203 # format string does contain unindexed parameters P101 + # first line should end with a period [Bug: doesn't work with single-line docstrings] + D400 + # First line should be in imperative mood; try rephrasing + D401 exclude = .git,__pycache__,build,docs/_build,dist per-file-ignores = tests/*: D */__init__.py: F401 +rst-roles = + class, + func, + ref, + cite:p, + cite:t, +rst-directives = + envvar, + exception, +rst-substitutions = + version, +extend-ignore = + RST307, diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2f0ffd7a..195d4d44 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.8", "3.10"] + python: ["3.9", "3.10"] os: [ubuntu-latest] env: diff --git a/.gitignore b/.gitignore index 7bb0bd5e..2baa8042 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,7 @@ __pycache__/ # IDEs /.idea/ /.vscode/ + +# docs +/docs/generated/ +/docs/_build/ diff --git a/.mypy.ini b/.mypy.ini new file mode 100644 index 00000000..4da7a91d --- /dev/null +++ b/.mypy.ini @@ -0,0 +1,28 @@ +[mypy] +python_version = 3.9 +plugins = numpy.typing.mypy_plugin + +ignore_errors = False +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True + +disallow_untyped_calls = False +disallow_untyped_defs = True +disallow_incomplete_defs = True +disallow_any_generics = True + +strict_optional = True +strict_equality = True +warn_return_any = True +warn_unreachable = False +check_untyped_defs = True +; because of docrep +allow_untyped_decorators = True +no_implicit_optional = True +no_implicit_reexport = True +no_warn_no_return = True + +show_error_codes = True +show_column_numbers = True +error_summary = True diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 50cb10a3..b34c35a1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,11 +7,11 @@ default_stages: minimum_pre_commit_version: 2.16.0 repos: - repo: https://github.com/psf/black - rev: 22.8.0 + rev: 22.12.0 hooks: - id: black - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.0-alpha.0 + rev: v3.0.0-alpha.4 hooks: - id: prettier - repo: https://github.com/asottile/blacken-docs @@ -19,9 +19,15 @@ repos: hooks: - id: blacken-docs - repo: https://github.com/PyCQA/isort - rev: 5.10.1 + rev: v5.11.3 hooks: - id: isort + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.991 + hooks: + - id: mypy + additional_dependencies: [numpy==1.23.0] + exclude: tests/|docs/|src/spatialdata_io/xenium.py|src/spatialdata_io/visium.py|src/spatialdata_io/resolve.py| - repo: https://github.com/asottile/yesqa rev: v1.4.0 hooks: @@ -34,7 +40,7 @@ repos: - flake8-bugbear - flake8-blind-except - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: detect-private-key - id: check-ast @@ -44,7 +50,7 @@ repos: - id: trailing-whitespace - id: check-case-conflict - repo: https://github.com/myint/autoflake - rev: v1.6.1 + rev: v2.0.0 hooks: - id: autoflake args: @@ -53,7 +59,7 @@ repos: - --remove-unused-variable - --ignore-init-module-imports - repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 + rev: 6.0.0 hooks: - id: flake8 additional_dependencies: @@ -64,7 +70,20 @@ repos: - flake8-bugbear - flake8-blind-except - repo: https://github.com/asottile/pyupgrade - rev: v2.38.2 + rev: v3.3.1 hooks: - id: pyupgrade - args: [--py3-plus, --py38-plus, --keep-runtime-typing] + args: [--py3-plus, --py39-plus, --keep-runtime-typing] + - repo: local + hooks: + - id: forbid-to-commit + name: Don't commit rej files + entry: | + Cannot commit .rej files. These indicate merge conflicts that arise during automated template updates. + Fix the merge conflicts manually and remove the .rej files. + language: fail + files: '.*\.rej$' + - repo: https://github.com/PyCQA/doc8 + rev: v1.0.0 + hooks: + - id: doc8 diff --git a/docs/Makefile b/docs/Makefile index d4bb2cbb..fbe1adc5 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -18,3 +18,7 @@ help: # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +clean: + rm -r "$(BUILDDIR)" + rm -r "generated" diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst new file mode 100644 index 00000000..ee6d05f5 --- /dev/null +++ b/docs/_templates/autosummary/class.rst @@ -0,0 +1,75 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. add toctree option to make autodoc generate the pages + +.. autoclass:: {{ objname }} + +{% block attributes %} +{% if attributes %} +Attributes table +~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + +{% for item in attributes %} + + ~{{ fullname }}.{{ item }} + +{%- endfor %} +{% endif %} +{% endblock %} + +{% block methods %} +{% if methods %} +Methods table +~~~~~~~~~~~~~ + +.. autosummary:: + +{% for item in methods %} + + {%- if item != '__init__' %} + ~{{ fullname }}.{{ item }} + {%- endif -%} + +{%- endfor %} +{% endif %} +{% endblock %} + +{% block attributes_documentation %} +{% if attributes %} +Attributes +~~~~~~~~~~~ + +{% for item in attributes %} + +{{ item }} +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoattribute:: {{ [objname, item] | join(".") }} + +{%- endfor %} + +{% endif %} +{% endblock %} + +{% block methods_documentation %} +{% if methods %} +Methods +~~~~~~~ + +{% for item in methods %} +{%- if item != '__init__' %} + +{{ item }} +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automethod:: {{ [objname, item] | join(".") }} + +{%- endif -%} +{%- endfor %} + +{% endif %} +{% endblock %} diff --git a/docs/api.md b/docs/api.md index 9a34848d..0ac5d97e 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,37 +1,7 @@ # API -## Preprocessing +## Coming soon ```{eval-rst} -.. module:: spatialdata_io.pp -.. currentmodule:: spatialdata_io -.. autosummary:: - :toctree: generated - - pp.basic_preproc -``` - -## Tools - -```{eval-rst} -.. module:: spatialdata_io.tl -.. currentmodule:: spatialdata_io - -.. autosummary:: - :toctree: generated - - tl.basic_tool -``` - -## Plotting - -```{eval-rst} -.. module:: spatialdata_io.pl -.. currentmodule:: spatialdata_io - -.. autosummary:: - :toctree: generated - - pl.basic_plot ``` diff --git a/docs/conf.py b/docs/conf.py index 006ff254..e34c5298 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,10 +17,11 @@ # -- Project information ----------------------------------------------------- info = metadata("spatialdata-io") -project = info["Name"] +project_name = info["Name"] author = info["Author"] copyright = f"{datetime.now():%Y}, {author}." version = info["Version"] +repository_url = f"https://github.com/scverse/{project_name}" # The full version, including alpha/beta/rc tags release = info["Version"] @@ -33,7 +34,7 @@ html_context = { "display_github": True, # Integrate GitHub "github_user": "scverse", # Username - "github_repo": project, # Repo name + "github_repo": project_name, # Repo name "github_version": "main", # Version "conf_py_path": "/docs/", # Path in the checkout to the docs root } @@ -43,16 +44,16 @@ # Add any Sphinx extension module names here, as strings. # They can be extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ - "myst_parser", + "myst_nb", + "sphinx_copybutton", "sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.autosummary", "sphinx.ext.napoleon", "sphinxcontrib.bibtex", "sphinx_autodoc_typehints", - "scanpydoc.definition_list_typed_field", - "nbsphinx", "sphinx.ext.mathjax", + "IPython.sphinxext.ipython_console_highlighting", *[p.stem for p in (HERE / "extensions").glob("*.py")], ] @@ -65,14 +66,31 @@ napoleon_use_rtype = True # having a separate entry generally helps readability napoleon_use_param = True myst_heading_anchors = 3 # create anchors for h1-h3 +myst_enable_extensions = [ + "amsmath", + "colon_fence", + "deflist", + "dollarmath", + "html_image", + "html_admonition", +] +myst_url_schemes = ("http", "https", "mailto") +nb_output_stderr = "remove" +nb_execution_mode = "off" +nb_merge_streams = True +typehints_defaults = "braces" + +source_suffix = { + ".rst": "restructuredtext", + ".ipynb": "myst-nb", + ".myst": "myst-nb", +} intersphinx_mapping = { "anndata": ("https://anndata.readthedocs.io/en/stable/", None), "numpy": ("https://numpy.org/doc/stable/", None), } -nbsphinx_execute = "never" - # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. @@ -84,10 +102,16 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = "furo" +html_theme = "sphinx_book_theme" html_static_path = ["_static"] +html_title = project_name + +html_theme_options = { + "repository_url": repository_url, + "use_repository_button": True, +} -pygments_style = "sphinx" +pygments_style = "default" nitpick_ignore = [ # If building the documentation fails because of a missing link that is outside your control, diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 00000000..b678c149 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,197 @@ +# Contributing guide + +Scanpy provides extensive [developer documentation][scanpy developer guide], most of which applies to this repo, too. +This document will not reproduce the entire content from there. Instead, it aims at summarizing the most important +information to get you started on contributing. + +We assume that you are already familiar with git and with making pull requests on GitHub. If not, please refer +to the [scanpy developer guide][]. + +## Installing dev dependencies + +In addition to the packages needed to _use_ this package, you need additional python packages to _run tests_ and _build +the documentation_. It's easy to install them using `pip`: + +```bash +cd spatialdata-io +pip install -e ".[dev,test,doc]" +``` + +## Code-style + +This template uses [pre-commit][] to enforce consistent code-styles. On every commit, pre-commit checks will either +automatically fix issues with the code, or raise an error message. See [pre-commit checks](template_usage.md#pre-commit-checks) for +a full list of checks enabled for this repository. + +To enable pre-commit locally, simply run + +```bash +pre-commit install +``` + +in the root of the repository. Pre-commit will automatically download all dependencies when it is run for the first time. + +Alternatively, you can rely on the [pre-commit.ci][] service enabled on GitHub. If you didn't run `pre-commit` before +pushing changes to GitHub it will automatically commit fixes to your pull request, or show an error message. + +If pre-commit.ci added a commit on a branch you still have been working on locally, simply use + +```bash +git pull --rebase +``` + +to integrate the changes into yours. +While the [pre-commit.ci][] is useful, we strongly encourage installing and running pre-commit locally first to understand its usage. + +Finally, most editors have an _autoformat on save_ feature. Consider enabling this option for [black][black-editors] +and [prettier][prettier-editors]. + +[black-editors]: https://black.readthedocs.io/en/stable/integrations/editors.html +[prettier-editors]: https://prettier.io/docs/en/editors.html + +## Writing tests + +```{note} +Remember to first install the package with `pip install '-e[dev,test]'` +``` + +This package uses the [pytest][] for automated testing. Please [write tests][scanpy-test-docs] for every function added +to the package. + +Most IDEs integrate with pytest and provide a GUI to run tests. Alternatively, you can run all tests from the +command line by executing + +```bash +pytest +``` + +in the root of the repository. Continuous integration will automatically run the tests on all pull requests. + +[scanpy-test-docs]: https://scanpy.readthedocs.io/en/latest/dev/testing.html#writing-tests + +## Publishing a release + +### Updating the version number + +Before making a release, you need to update the version number. Please adhere to [Semantic Versioning][semver], in brief + +> Given a version number MAJOR.MINOR.PATCH, increment the: +> +> 1. MAJOR version when you make incompatible API changes, +> 2. MINOR version when you add functionality in a backwards compatible manner, and +> 3. PATCH version when you make backwards compatible bug fixes. +> +> Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format. + +We use [bump2version][] to automatically update the version number in all places and automatically create a git tag. +Run one of the following commands in the root of the repository + +```bash +bump2version patch +bump2version minor +bump2version major +``` + +Once you are done, run + +``` +git push --tags +``` + +to publish the created tag on GitHub. + +[bump2version]: https://github.com/c4urself/bump2version + +### Building and publishing the package on PyPI + +Python packages are not distributed as source code, but as _distributions_. The most common distribution format is the so-called _wheel_. To build a _wheel_, run + +```bash +python -m build +``` + +This command creates a _source archive_ and a _wheel_, which are required for publishing your package to [PyPI][]. These files are created directly in the root of the repository. + +Before uploading them to [PyPI][] you can check that your _distribution_ is valid by running: + +```bash +twine check dist/* +``` + +and finally publishing it with: + +```bash +twine upload dist/* +``` + +Provide your username and password when requested and then go check out your package on [PyPI][]! + +For more information, follow the [Python packaging tutorial][]. + +It is possible to automate this with GitHub actions, see also [this feature request][pypi-feature-request] +in the cookiecutter-scverse template. + +[python packaging tutorial]: https://packaging.python.org/en/latest/tutorials/packaging-projects/#generating-distribution-archives +[pypi-feature-request]: https://github.com/scverse/cookiecutter-scverse/issues/88 + +## Writing documentation + +Please write documentation for new or changed features and use-cases. This project uses [sphinx][] with the following features: + +- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text +- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). +- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) +- [Sphinx autodoc typehints][], to automatically reference annotated input and output types + +See the [scanpy developer docs](https://scanpy.readthedocs.io/en/latest/dev/documentation.html) for more information +on how to write documentation. + +### Tutorials with myst-nb and jupyter notebooks + +The documentation is set-up to render jupyter notebooks stored in the `docs/notebooks` directory using [myst-nb][]. +Currently, only notebooks in `.ipynb` format are supported that will be included with both their input and output cells. +It is your reponsibility to update and re-run the notebook whenever necessary. + +If you are interested in automatically running notebooks as part of the continuous integration, please check +out [this feature request](https://github.com/scverse/cookiecutter-scverse/issues/40) in the `cookiecutter-scverse` +repository. + +#### Hints + +- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only + if you do so can sphinx automatically create a link to the external documentation. +- If building the documentation fails because of a missing link that is outside your control, you can add an entry to + the `nitpick_ignore` list in `docs/conf.py` + +#### Building the docs locally + +```bash +cd docs +make html +open _build/html/index.html +``` + + + +[scanpy developer guide]: https://scanpy.readthedocs.io/en/latest/dev/index.html +[spatialdata-io]: https://spatialdata-io.readthedocs.io/en/latest/template_usage.html +[github quickstart guide]: https://docs.github.com/en/get-started/quickstart/create-a-repo?tool=webui +[codecov]: https://about.codecov.io/sign-up/ +[codecov docs]: https://docs.codecov.com/docs +[codecov bot]: https://docs.codecov.com/docs/team-bot +[codecov app]: https://github.com/apps/codecov +[pre-commit.ci]: https://pre-commit.ci/ +[readthedocs.org]: https://readthedocs.org/ +[myst-nb]: https://myst-nb.readthedocs.io/en/latest/ +[jupytext]: https://jupytext.readthedocs.io/en/latest/ +[pre-commit]: https://pre-commit.com/ +[anndata]: https://github.com/scverse/anndata +[mudata]: https://github.com/scverse/mudata +[pytest]: https://docs.pytest.org/ +[semver]: https://semver.org/ +[sphinx]: https://www.sphinx-doc.org/en/master/ +[myst]: https://myst-parser.readthedocs.io/en/latest/intro.html +[numpydoc-napoleon]: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html +[numpydoc]: https://numpydoc.readthedocs.io/en/latest/format.html +[sphinx autodoc typehints]: https://github.com/tox-dev/sphinx-autodoc-typehints +[pypi]: https://pypi.org/ diff --git a/docs/developer_docs.md b/docs/developer_docs.md deleted file mode 100644 index 793493d4..00000000 --- a/docs/developer_docs.md +++ /dev/null @@ -1,343 +0,0 @@ -# Developer documentation - -Welcome to the developer guidelines! This document is split into two parts: - -1. The [repository setup](#setting-up-the-repository). This section is relevant primarily for the repository maintainer and shows how to connect - continuous integration services and documents initial set-up of the repository. -2. The [contributor guide](#contributing-guide). It contains information relevant to all developers who want to make a contribution. - -## Setting up the repository - -### Documentation on _readthedocs_ - -We recommend using [readthedocs.org][] (RTD) to build and host the documentation for your project. -To enable readthedocs, head over to [their webiste][readthedocs.org] and sign in with your GitHub account. -On the RTD dashboard choose "Import a Project" and follow the instructions to add your repository. - -- Make sure to choose the correct name of the default branch. On GitHub, the default name of the default branch has - recently changed from `master` to `main`. -- We recommend to enable documentation builds for pull requests (PRs). This ensures that a PR doesn't introduce changes - that break the documentation. To do so, got to `Admin -> Advanced Settings`, check the - `Build pull requests for this projects` option, and click `Save`. For more information, please refer to - the [official RTD documentation](https://docs.readthedocs.io/en/stable/pull-requests.html). - -### Coverage tests with _Codecov_ - -Coverage tells what fraction of the code is "covered" by unit tests, thereby encouraging contributors to -[write tests](#writing-tests). -To enable coverage checks, head over to [codecov][] and sign in with your GitHub account. -You'll find more information in "getting started" section of the [codecov docs][]. - -In brief, you need to: - -1. Generate a Codecov Token by clicking _setup repo_ in the codecov dashboard. -2. Go to the _Settings_ of your newly created repository on GitHub. -3. Go to _Security > Secrets > Actions_. -4. Create new repository secret with name `CODECOV_TOKEN` and paste the token generated by codecov -5. Go back to Github Actions page an re-run previously failed jobs. - -### Pre-commit checks - -[Pre-commit][] checks are fast programs that -check code for errors, inconsistencies and code styles, before the code -is committed. - -We recommend setting up [pre-commit.ci][] to enforce consistency checks on every commit -and pull-request. - -To do so, head over to [pre-commit.ci][] and click "Sign In With GitHub". Follow -the instructions to enable pre-commit.ci for your account or your organization. You -may choose to enable the service for an entire organization or on a per-repository basis. - -Once authorized, pre-commit.ci should automatically be activated. - -#### Overview of pre-commit hooks used by the template - -The following pre-commit checks are for code style and format: - -- [black](https://black.readthedocs.io/en/stable/): standard code - formatter in Python. -- [isort](https://pycqa.github.io/isort/): sort module imports into - sections and types. -- [prettier](https://prettier.io/docs/en/index.html): standard code - formatter for non-Python files (e.g. YAML). -- [blacken-docs](https://github.com/asottile/blacken-docs): black on - python code in docs. - -The following pre-commit checks are for errors and inconsistencies: - -- [flake8](https://flake8.pycqa.org/en/latest/): standard check for errors in Python files. - - [flake8-tidy-imports](https://github.com/adamchainz/flake8-tidy-imports): - tidy module imports. - - [flake8-docstrings](https://github.com/PyCQA/flake8-docstrings): - pydocstyle extension of flake8. - - [flake8-rst-docstrings](https://github.com/peterjc/e8-rst-docstrings): - extension of `flake8-docstrings` for `rst` docs. - - [flake8-comprehensions](https://github.com/adamchainz/e8-comprehensions): - write better list/set/dict comprehensions. - - [flake8-bugbear](https://github.com/PyCQA/flake8-bugbear): - find possible bugs and design issues in program. - - [flake8-blind-except](https://github.com/elijahandrews/flake8-blind-except): - checks for blind, catch-all `except` statements. -- [yesqa](https://github.com/asottile/yesqa): - remove unneccesary `# noqa` comments, follows additional dependencies listed above. -- [autoflake](https://github.com/PyCQA/autoflake): - remove unused imports and variables. -- [pre-commit-hooks](https://github.com/pre-commit/pre-commit-hooks): generic pre-commit hooks. - - **detect-private-key**: checks for the existence of private keys. - - **check-ast**: check whether files parse as valid python. - - **end-of-file-fixer**:check files end in a newline and only a newline. - - **mixed-line-ending**: checks mixed line ending. - - **trailing-whitespace**: trims trailing whitespace. - - **check-case-conflict**: check files that would conflict with case-insensitive file systems. -- [pyupgrade](https://github.com/asottile/pyupgrade): - upgrade syntax for newer versions of the language. - -#### Notes on pre-commit checks - -- To ignore lint warnigs from **flake8**, see [Ignore certain lint warnings](#ignore-certain-lint-warnings). -- You can add or remove pre-commit checks by simply deleting relevant lines in the `.pre-commit-config.yaml` file. - Some pre-commit checks have additional options that can be specified either in the `pyproject.toml` or tool-specific - config files, such as `.prettierrc.yml` for **prettier** and `.flake8` for **flake8**. - -### API design - -Scverse ecosystem packages should operate on [AnnData][] and/or [MuData][] datastructures and typically use an API -as originally [introduced by scanpy][scanpy-api] with the following submodules: - -- `pp` for preprocessing -- `tl` for tools (that, compared to `pp` generate interpretable output, often associated with a corresponding plotting - function) -- `pl` for plotting functions - -You may add additional submodules as appropriate. While we encourage to follow a scanpy-like API for ecosystem packages, -there may also be good reasons to choose a different approach, e.g. using an object-oriented API. - -[scanpy-api]: https://scanpy.readthedocs.io/en/stable/usage-principles.html - -### Ignore certain lint warnings - -The [pre-commit checks](#pre-commit-checks) include [flake8](https://flake8.pycqa.org/en/latest/) which checks -for errors in Python files, including stylistic errors. - -In some cases it might overshoot and you may have good reasons to ignore certain warnings. - -To ignore an specific error on a per-case basis, you can add a comment `# noqa` to the offending line. You can also -specify the error ID to ignore, with e.g. `# noqa: E731`. Check the [flake8 guide][] for reference. - -Alternatively, you can disable certain error messages for the entire project. To do so, edit the `.flake8` -file in the root of the repository. Add one line per linting code you wish to ignore and don't forget to add a comment. - -```toml -... -# line break before a binary operator -> black does not adhere to PEP8 -W503 -# line break occured after a binary operator -> black does not adhere to PEP8 -W504 -... -``` - -[flake8 guide]: https://flake8.pycqa.org/en/3.1.1/user/ignoring-errors.html - -### Using VCS-based versioning - -By default, the template uses hard-coded version numbers that are set in `pyproject.toml` and [managed with -bump2version](#making-a-release). If you prefer to have your project automatically infer version numbers from git -tags, it is straightforward to switch to vcs-based versioning using [hatch-vcs][]. - -In `pyproject.toml` add the following changes, and you are good to go! - -```diff ---- a/pyproject.toml -+++ b/pyproject.toml -@@ -1,11 +1,11 @@ - [build-system] - build-backend = "hatchling.build" --requires = ["hatchling"] -+requires = ["hatchling", "hatch-vcs"] - - - [project] - name = "spatialdata-io" --version = "0.3.1dev" -+dynamic = ["version"] - -@@ -60,6 +60,9 @@ -+[tool.hatch.version] -+source = "vcs" -+ - [tool.coverage.run] - source = ["spatialdata-io"] - omit = [ -``` - -Don't forget to update the [Making a release section](#making-a-release) in this document accordingly, after you are done! - -[hatch-vcs]: https://pypi.org/project/hatch-vcs/ - -## Contributing guide - -Scanpy provides extensive [developer documentation][scanpy developer guide], most of which applies to this repo, too. -This document will not reproduce the entire content from there. Instead, it aims at summarizing the most important -information to get you started on contributing. - -We assume that you are already familiar with git and with making pull requests on GitHub. If not, please refer -to the [scanpy developer guide][]. - -### Installing dev dependencies - -In addition to the packages needed to _use_ this package, you need additional python packages to _run tests_ and _build -the documentation_. It's easy to install them using `pip`: - -```bash -pip install "spatialdata-io[dev,test,doc]" -``` - -### Code-style - -This template uses [pre-commit][] to enforce consistent code-styles. On every commit, pre-commit checks will either -automatically fix issues with the code, or raise an error message. See [pre-commit checks](#pre-commit-checks) for -a full list of checks enabled for this repository. - -To enable pre-commit locally, simply run - -```bash -pre-commit install -``` - -in the root of the repository. Pre-commit will automatically download all dependencies when it is run for the first time. - -Alternatively, you can rely on the [pre-commit.ci][] service enabled on GitHub. If you didn't run `pre-commit` before -pushing changes to GitHub it will automatically commit fixes to your pull request, or show an error message. - -If pre-commit.ci added a commit on a branch you still have been working on locally, simply use - -```bash -git pull --rebase -``` - -to integrate the changes into yours. - -Finally, most editors have an _autoformat on save_ feature. Consider enabling this option for [black][black-editors] -and [prettier][prettier-editors]. - -[black-editors]: https://black.readthedocs.io/en/stable/integrations/editors.html -[prettier-editors]: https://prettier.io/docs/en/editors.html - -### Writing tests - -This package uses the [pytest][] for automated testing. Please [write tests][scanpy-test-docs] for every function added -to the package. - -Most IDEs integrate with pytest and provide a GUI to run tests. Alternatively, you can run all tests from the -command line by executing - -```bash -pytest -``` - -in the root of the repository. Continuous integration will automatically run the tests on all pull requests. - -[scanpy-test-docs]: https://scanpy.readthedocs.io/en/latest/dev/testing.html#writing-tests - -### Making a release - -#### Updating the version number - -Before making a release, you need to update the version number. Please adhere to [Semantic Versioning][semver], in brief - -> Given a version number MAJOR.MINOR.PATCH, increment the: -> -> 1. MAJOR version when you make incompatible API changes, -> 2. MINOR version when you add functionality in a backwards compatible manner, and -> 3. PATCH version when you make backwards compatible bug fixes. -> -> Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format. - -We use [bump2version][] to automatically update the version number in all places and automatically create a git tag. -Run one of the following commands in the root of the repository - -```bash -bump2version patch -bump2version minor -bump2version major -``` - -Once you are done, run - -``` -git push --tags -``` - -to publish the created tag on GitHub. - -[bump2version]: https://github.com/c4urself/bump2version - -#### Upload on PyPI - -Please follow the [Python packaging tutorial][]. - -It is possible to automate this with GitHub actions, see also [this feature request][pypi-feature-request] -in the cookiecutter-scverse template. - -[python packaging tutorial]: https://packaging.python.org/en/latest/tutorials/packaging-projects/#generating-distribution-archives -[pypi-feature-request]: https://github.com/scverse/cookiecutter-scverse/issues/88 - -### Writing documentation - -Please write documentation for your package. This project uses [sphinx][] with the following features: - -- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text -- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). -- Jupyter notebooks as tutorials through [nbsphinx][] (See [Tutorials with nbsphinx](#tutorials-with-nbsphinx-and-jupyter-notebooks)) -- [Sphinx autodoc typehints][], to automatically reference annotated input and output types - -See the [scanpy developer docs](https://scanpy.readthedocs.io/en/latest/dev/documentation.html) for more information -on how to write documentation. - -### Tutorials with nbsphinx and jupyter notebooks - -The documentation is set-up to render jupyter notebooks stored in the `docs/notebooks` directory using [nbsphinx][]. -Currently, only notebooks in `.ipynb` format are supported that will be included with both their input and output cells. -It is your reponsibility to update and re-run the notebook whenever necessary. - -If you are interested in automatically running notebooks as part of the continuous integration, please check -out [this feature request](https://github.com/scverse/cookiecutter-scverse/issues/40) in the `cookiecutter-scverse` -repository. - -[nbsphinx]: https://github.com/spatialaudio/nbsphinx - -#### Hints - -- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only - if you do so can sphinx automatically create a link to the external documentation. -- If building the documentation fails because of a missing link that is outside your control, you can add an entry to - the `nitpick_ignore` list in `docs/conf.py` - -#### Building the docs locally - -```bash -cd docs -make html -open _build/html/index.html -``` - - - -[scanpy developer guide]: https://scanpy.readthedocs.io/en/latest/dev/index.html -[codecov]: https://about.codecov.io/sign-up/ -[codecov docs]: https://docs.codecov.com/docs -[pre-commit.ci]: https://pre-commit.ci/ -[readthedocs.org]: https://readthedocs.org/ -[nbshpinx]: https://github.com/spatialaudio/nbsphinx -[jupytext]: https://jupytext.readthedocs.io/en/latest/ -[pre-commit]: https://pre-commit.com/ -[anndata]: https://github.com/scverse/anndata -[mudata]: https://github.com/scverse/mudata -[pytest]: https://docs.pytest.org/ -[semver]: https://semver.org/ -[sphinx]: https://www.sphinx-doc.org/en/master/ -[myst]: https://myst-parser.readthedocs.io/en/latest/intro.html -[numpydoc-napoleon]: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html -[numpydoc]: https://numpydoc.readthedocs.io/en/latest/format.html -[sphinx autodoc typehints]: https://github.com/tox-dev/sphinx-autodoc-typehints diff --git a/docs/extensions/typed_returns.py b/docs/extensions/typed_returns.py new file mode 100644 index 00000000..94478130 --- /dev/null +++ b/docs/extensions/typed_returns.py @@ -0,0 +1,29 @@ +# code from https://github.com/theislab/scanpy/blob/master/docs/extensions/typed_returns.py +# with some minor adjustment +import re + +from sphinx.application import Sphinx +from sphinx.ext.napoleon import NumpyDocstring + + +def _process_return(lines): + for line in lines: + m = re.fullmatch(r"(?P\w+)\s+:\s+(?P[\w.]+)", line) + if m: + # Once this is in scanpydoc, we can use the fancy hover stuff + yield f'-{m["param"]} (:class:`~{m["type"]}`)' + else: + yield line + + +def _parse_returns_section(self, section): + lines_raw = list(_process_return(self._dedent(self._consume_to_next_section()))) + lines = self._format_block(":returns: ", lines_raw) + if lines and lines[-1]: + lines.append("") + return lines + + +def setup(app: Sphinx): + """Set app.""" + NumpyDocstring._parse_returns_section = _parse_returns_section diff --git a/docs/index.md b/docs/index.md index 23a1e198..46aa14d4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,8 +8,7 @@ api.md changelog.md -developer_docs.md +template_usage.md +contributing.md references.md - -notebooks/example ``` diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb deleted file mode 100644 index 49fb681f..00000000 --- a/docs/notebooks/example.ipynb +++ /dev/null @@ -1,79 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Example notebook" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "from anndata import AnnData\n", - "import spatialdata_io" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "adata = AnnData(np.random.normal(size=(20, 10)))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Implement a preprocessing function here." - ] - }, - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "spatialdata_io.pp.basic_preproc(adata)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/template_usage.md b/docs/template_usage.md new file mode 100644 index 00000000..324fe4c3 --- /dev/null +++ b/docs/template_usage.md @@ -0,0 +1,322 @@ +# Using this template + +Welcome to the developer guidelines! This document is split into two parts: + +1. The [repository setup](#setting-up-the-repository). This section is relevant primarily for the repository maintainer and shows how to connect + continuous integration services and documents initial set-up of the repository. +2. The [contributor guide](contributing.md#contributing-guide). It contains information relevant to all developers who want to make a contribution. + +## Setting up the repository + +### First commit + +If you are reading this, you should have just completed the repository creation with : + +```bash +cruft create https://github.com/scverse/cookiecutter-scverse +``` + +and you should have + +``` +cd spatialdata-io +``` + +into the new project directory. Now that you have created a new repository locally, the first step is to push it to github. To do this, you'd have to create a **new repository** on github. +You can follow the instructions directly on [github quickstart guide][]. +Since `cruft` already populated the local repository of your project with all the necessary files, we suggest to _NOT_ initialize the repository with a `README.md` file or `.gitignore`, because you might encounter git conflicts on your first push. +If you are familiar with git and knows how to handle git conflicts, you can go ahead with your preferred choice. + +:::{note} +If you are looking at this document in the [spatialdata-io][] repository documentation, throughout this document the name of the project is `spatialdata-io`. Otherwise it should be replaced by your new project name: `spatialdata-io`. +::: + +Now that your new project repository has been created on github at `https://github.com/scverse/spatialdata-io` you can push your first commit to github. +To do this, simply follow the instructions on your github repository page or a more verbose walkthrough here: + +Assuming you are in `/your/path/to/spatialdata-io`. Add all files and commit. + +```bash +# stage all files of your new repo +git add --all +# commit +git commit -m "first commit" +``` + +You'll notice that the command `git commit` installed a bunch of packages and triggered their execution: those are pre-commit! To read more about what they are and what they do, you can go to the related section [Pre-commit checks](#pre-commit-checks) in this document. + +:::{note} +There is a chance that `git commit -m "first commit"` fails due to the `prettier` pre-commit formatting the file `.cruft.json`. No problem, you have just experienced what pre-commit checks do in action. Just go ahead and re-add the modified file and try to commit again: + +```bash + git add -u # update all tracked file + git commit -m "first commit" +``` + +::: + +Now that all the files of the newly created project have been committed, go ahead with the remaining steps: + +```bash +# update the `origin` of your local repo with the remote github link +git remote add origin https://github.com/scverse/spatialdata-io.git +# rename the default branch to main +git branch -M main +# push all your files to remote +git push -u origin main +``` + +Your project should be now available at `https://github.com/scverse/spatialdata-io`. While the repository at this point can be directly used, there are few remaining steps that needs to be done in order to achieve full functionality. + +### Coverage tests with _Codecov_ + +Coverage tells what fraction of the code is "covered" by unit tests, thereby encouraging contributors to +[write tests](contributing.md#writing-tests). +To enable coverage checks, head over to [codecov][] and sign in with your GitHub account. +You'll find more information in "getting started" section of the [codecov docs][]. + +In the `Actions` tab of your projects' github repository, you can see that the workflows are failing due to the **Upload coverage** step. The error message in the workflow should display something like: + +``` +... + Retrying 5/5 in 2s.. + {'detail': ErrorDetail(string='Could not find a repository, try using repo upload token', code='not_found')} +Error: 404 Client Error: Not Found for url: +... +``` + +While [codecov docs][] has a very extensive documentation on how to get started, _if_ you are using the default settings of this template we can assume that you are using [codecov][] in a github action workflow and hence you can make use of the [codecov bot][]. + +To set it up, simply go to the [codecov app][] page and follow the instructions to activate it for your repository. +Once the activation is completed, go back to the `Actions` tab and re-run the failing workflows. + +The workflows should now succeed and you will be able to find the code coverage at this link: `https://app.codecov.io/gh/scverse/spatialdata-io`. You might have to wait couple of minutes and the coverage of this repository should be ~60%. + +If your repository is private, you will have to specify an additional token in the repository secrets. In brief, you need to: + +1. Generate a Codecov Token by clicking _setup repo_ in the codecov dashboard. + - If you have already set up codecov in the repository by following the previous steps, you can directly go to the codecov repo webpage. +2. Go to _Settings_ and copy **only** the token `_______-____-...`. +3. Go to _Settings_ of your newly created repository on GitHub. +4. Go to _Security > Secrets > Actions_. +5. Create new repository secret with name `CODECOV_TOKEN` and paste the token generated by codecov. +6. Past these additional lines in `/.github/workflows.test.yaml` under the **Upload coverage** step: + ```bash + - name: Upload coverage + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + ``` +7. Go back to github `Actions` page an re-run previously failed jobs. + +### Documentation on _readthedocs_ + +We recommend using [readthedocs.org][] (RTD) to build and host the documentation for your project. +To enable readthedocs, head over to [their website][readthedocs.org] and sign in with your GitHub account. +On the RTD dashboard choose "Import a Project" and follow the instructions to add your repository. + +- Make sure to choose the correct name of the default branch. On GitHub, the name of the default branch should be `main` (it has + recently changed from `master` to `main`). +- We recommend to enable documentation builds for pull requests (PRs). This ensures that a PR doesn't introduce changes + that break the documentation. To do so, got to `Admin -> Advanced Settings`, check the + `Build pull requests for this projects` option, and click `Save`. For more information, please refer to + the [official RTD documentation](https://docs.readthedocs.io/en/stable/pull-requests.html). +- If you find the RTD builds are failing, you can disable the `fail_on_warning` option in `.readthedocs.yaml`. + +If your project is private, there are ways to enable docs rendering on [readthedocs.org][] but it is more cumbersome and requires a different subscription for read the docs. See a guide [here](https://docs.readthedocs.io/en/stable/guides/importing-private-repositories.html). + +### Pre-commit checks + +[Pre-commit][] checks are fast programs that +check code for errors, inconsistencies and code styles, before the code +is committed. + +We recommend setting up [pre-commit.ci][] to enforce consistency checks on every commit +and pull-request. + +To do so, head over to [pre-commit.ci][] and click "Sign In With GitHub". Follow +the instructions to enable pre-commit.ci for your account or your organization. You +may choose to enable the service for an entire organization or on a per-repository basis. + +Once authorized, pre-commit.ci should automatically be activated. + +#### Overview of pre-commit hooks used by the template + +The following pre-commit checks are for code style and format: + +- [black](https://black.readthedocs.io/en/stable/): standard code + formatter in Python. +- [isort](https://pycqa.github.io/isort/): sort module imports into + sections and types. +- [prettier](https://prettier.io/docs/en/index.html): standard code + formatter for non-Python files (e.g. YAML). +- [blacken-docs](https://github.com/asottile/blacken-docs): black on + python code in docs. + +The following pre-commit checks are for errors and inconsistencies: + +- [flake8](https://flake8.pycqa.org/en/latest/): standard check for errors in Python files. + - [flake8-tidy-imports](https://github.com/adamchainz/flake8-tidy-imports): + tidy module imports. + - [flake8-docstrings](https://github.com/PyCQA/flake8-docstrings): + pydocstyle extension of flake8. + - [flake8-rst-docstrings](https://github.com/peterjc/e8-rst-docstrings): + extension of `flake8-docstrings` for `rst` docs. + - [flake8-comprehensions](https://github.com/adamchainz/e8-comprehensions): + write better list/set/dict comprehensions. + - [flake8-bugbear](https://github.com/PyCQA/flake8-bugbear): + find possible bugs and design issues in program. + - [flake8-blind-except](https://github.com/elijahandrews/flake8-blind-except): + checks for blind, catch-all `except` statements. +- [yesqa](https://github.com/asottile/yesqa): + remove unneccesary `# noqa` comments, follows additional dependencies listed above. +- [autoflake](https://github.com/PyCQA/autoflake): + remove unused imports and variables. +- [pre-commit-hooks](https://github.com/pre-commit/pre-commit-hooks): generic pre-commit hooks. + - **detect-private-key**: checks for the existence of private keys. + - **check-ast**: check whether files parse as valid python. + - **end-of-file-fixer**:check files end in a newline and only a newline. + - **mixed-line-ending**: checks mixed line ending. + - **trailing-whitespace**: trims trailing whitespace. + - **check-case-conflict**: check files that would conflict with case-insensitive file systems. +- [pyupgrade](https://github.com/asottile/pyupgrade): + upgrade syntax for newer versions of the language. +- **forbid-to-commit**: Make sure that `*.rej` files cannot be commited. These files are created by the + [automated template sync](#automated-template-sync) if there's a merge conflict and need to be addressed manually. + +### How to disable or add pre-commit checks + +- To ignore lint warnigs from **flake8**, see [Ignore certain lint warnings](#how-to-ignore-certain-lint-warnings). +- You can add or remove pre-commit checks by simply deleting relevant lines in the `.pre-commit-config.yaml` file. + Some pre-commit checks have additional options that can be specified either in the `pyproject.toml` or tool-specific + config files, such as `.prettierrc.yml` for **prettier** and `.flake8` for **flake8**. + +### How to ignore certain lint warnings + +The [pre-commit checks](#pre-commit-checks) include [flake8](https://flake8.pycqa.org/en/latest/) which checks +for errors in Python files, including stylistic errors. + +In some cases it might overshoot and you may have good reasons to ignore certain warnings. + +To ignore an specific error on a per-case basis, you can add a comment `# noqa` to the offending line. You can also +specify the error ID to ignore, with e.g. `# noqa: E731`. Check the [flake8 guide][] for reference. + +Alternatively, you can disable certain error messages for the entire project. To do so, edit the `.flake8` +file in the root of the repository. Add one line per linting code you wish to ignore and don't forget to add a comment. + +```toml +... +# line break before a binary operator -> black does not adhere to PEP8 +W503 +# line break occured after a binary operator -> black does not adhere to PEP8 +W504 +... +``` + +[flake8 guide]: https://flake8.pycqa.org/en/3.1.1/user/ignoring-errors.html + +### API design + +Scverse ecosystem packages should operate on [AnnData][] and/or [MuData][] data structures and typically use an API +as originally [introduced by scanpy][scanpy-api] with the following submodules: + +- `pp` for preprocessing +- `tl` for tools (that, compared to `pp` generate interpretable output, often associated with a corresponding plotting + function) +- `pl` for plotting functions + +You may add additional submodules as appropriate. While we encourage to follow a scanpy-like API for ecosystem packages, +there may also be good reasons to choose a different approach, e.g. using an object-oriented API. + +[scanpy-api]: https://scanpy.readthedocs.io/en/stable/usage-principles.html + +### Using VCS-based versioning + +By default, the template uses hard-coded version numbers that are set in `pyproject.toml` and [managed with +bump2version](contributing.md#publishing-a-release). If you prefer to have your project automatically infer version numbers from git +tags, it is straightforward to switch to vcs-based versioning using [hatch-vcs][]. + +In `pyproject.toml` add the following changes, and you are good to go! + +```diff +--- a/pyproject.toml ++++ b/pyproject.toml +@@ -1,11 +1,11 @@ + [build-system] + build-backend = "hatchling.build" +-requires = ["hatchling"] ++requires = ["hatchling", "hatch-vcs"] + + + [project] + name = "spatialdata-io" +-version = "0.3.1dev" ++dynamic = ["version"] + +@@ -60,6 +60,9 @@ ++[tool.hatch.version] ++source = "vcs" ++ + [tool.coverage.run] + source = ["spatialdata-io"] + omit = [ +``` + +Don't forget to update the [Making a release section](contributing.md#publishing-a-release) in this document accordingly, after you are done! + +[hatch-vcs]: https://pypi.org/project/hatch-vcs/ + +### Automated template sync + +Automated template sync is enabled by default. This means that every night, a GitHub action runs [cruft][] to check +if a new version of the `scverse-cookiecutter` template got released. If there are any new changes, a pull request +proposing these changes is created automatically. This helps keeping the repository up-to-date with the latest +coding standards. + +It may happen that a template sync results in a merge conflict. If this is the case a `*.ref` file with the +diff is created. You need to manually address these changes and remove the `.rej` file when you are done. +The pull request can only be merged after all `*.rej` files have been removed. + +:::{tip} +The following hints may be useful to work with the template sync: + +- GitHub automatically disables scheduled actions if there has been not activity to the repository for 60 days. + You can re-enable or manually trigger the sync by navigating to `Actions` -> `Sync Template` in your GitHub repository. +- If you want to ignore certain files from the template update, you can add them to the `[tool.cruft]` section in the + `pyproject.toml` file in the root of your repository. More details are described in the + [cruft documentation][cruft-update-project]. +- To disable the sync entirely, simply remove the file `.github/workflows/sync.yaml`. + +::: + +[cruft]: https://cruft.github.io/cruft/ +[cruft-update-project]: https://cruft.github.io/cruft/#updating-a-project + +## Moving forward + +You have reached the end of this document. Congratulations! You have successfully set up your project and are ready to start. +For everything else related to documentation, code style, testing and publishing your project ot pypi, please refer to the [contributing docs](contributing.md#contributing-guide). + + + +[scanpy developer guide]: https://scanpy.readthedocs.io/en/latest/dev/index.html +[spatialdata-io]: https://spatialdata-io.readthedocs.io/en/latest/template_usage.html +[github quickstart guide]: https://docs.github.com/en/get-started/quickstart/create-a-repo?tool=webui +[codecov]: https://about.codecov.io/sign-up/ +[codecov docs]: https://docs.codecov.com/docs +[codecov bot]: https://docs.codecov.com/docs/team-bot +[codecov app]: https://github.com/apps/codecov +[pre-commit.ci]: https://pre-commit.ci/ +[readthedocs.org]: https://readthedocs.org/ +[myst-nb]: https://myst-nb.readthedocs.io/en/latest/ +[jupytext]: https://jupytext.readthedocs.io/en/latest/ +[pre-commit]: https://pre-commit.com/ +[anndata]: https://github.com/scverse/anndata +[mudata]: https://github.com/scverse/mudata +[pytest]: https://docs.pytest.org/ +[semver]: https://semver.org/ +[sphinx]: https://www.sphinx-doc.org/en/master/ +[myst]: https://myst-parser.readthedocs.io/en/latest/intro.html +[numpydoc-napoleon]: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html +[numpydoc]: https://numpydoc.readthedocs.io/en/latest/format.html +[sphinx autodoc typehints]: https://github.com/tox-dev/sphinx-autodoc-typehints diff --git a/pyproject.toml b/pyproject.toml index 6ea1bd47..8435145b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ name = "spatialdata_io" version = "0.0.1" description = "SpatialData IO for common techs" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.9" license = {file = "LICENSE"} authors = [ {name = "scverse"}, @@ -19,7 +19,12 @@ maintainers = [ urls.Documentation = "https://spatialdata-io.readthedocs.io/" urls.Source = "https://github.com/scverse/spatialdata-io" urls.Home-page = "https://github.com/scverse/spatialdata-io" -dependencies = ["spatialdata"] +dependencies = [ + "spatialdata @ git+https://github.com/scverse/spatialdata.git@main", + "scanpy", + "psutil", + "loguru", +] [project.optional-dependencies] dev = [ @@ -29,13 +34,14 @@ dev = [ ] doc = [ "sphinx>=4", - "furo", - "myst-parser", + "sphinx-book-theme>=0.3.3", + "myst-nb", "sphinxcontrib-bibtex>=1.0.0", - "scanpydoc[typehints]>=0.7.4", + "sphinx-autodoc-typehints", # For notebooks - "nbsphinx", - "ipykernel" + "ipykernel", + "ipython", + "sphinx-copybutton", ] test = [ "pytest", @@ -63,7 +69,7 @@ skip_glob = ["docs/*"] [tool.black] line-length = 120 -target-version = ['py38'] +target-version = ['py39'] include = '\.pyi?$' exclude = ''' ( @@ -84,3 +90,6 @@ exclude = ''' [tool.jupytext] formats = "ipynb,md" + +[tool.hatch.metadata] +allow-direct-references = true diff --git a/src/spatialdata_io/readers/metaspace.py b/src/spatialdata_io/readers/metaspace.py index 660196b9..1e40048a 100644 --- a/src/spatialdata_io/readers/metaspace.py +++ b/src/spatialdata_io/readers/metaspace.py @@ -2,4 +2,4 @@ def read_metaspace(url: str) -> SpatialData: - pass + """Read metaspace.""" diff --git a/src/spatialdata_io/readers/resolve.py b/src/spatialdata_io/readers/resolve.py index f8a796bd..37de68a2 100644 --- a/src/spatialdata_io/readers/resolve.py +++ b/src/spatialdata_io/readers/resolve.py @@ -2,4 +2,4 @@ def read_resolve(path: str) -> SpatialData: - pass + """Read resolve""" diff --git a/src/spatialdata_io/readers/xenium.py b/src/spatialdata_io/readers/xenium.py index 85cd69a4..af93aa25 100644 --- a/src/spatialdata_io/readers/xenium.py +++ b/src/spatialdata_io/readers/xenium.py @@ -1,53 +1,51 @@ # format specification https://cf.10xgenomics.com/supp/xenium/xenium_documentation.html#polygon_vertices +import json import os import shutil import subprocess -import psutil +import time +from functools import partial +from itertools import chain +from multiprocessing import Pool +from typing import Any, Optional -from geopandas import GeoDataFrame -from shapely import Polygon -from spatialdata._io.write import write_polygons, write_points, write_table, write_shapes -from spatialdata._core.models import _parse_transform import numpy as np +import pandas as pd +import psutil +import pyarrow.parquet as pq import scanpy as sc +import zarr +from anndata import AnnData +from geopandas import GeoDataFrame +from loguru import logger +from ome_zarr.io import ZarrLocation, parse_url +from ome_zarr.reader import Label, Multiscales, Reader +from ome_zarr.writer import write_multiscales_metadata +from shapely import Polygon from spatialdata import ( - SpatialData, - Image2DModel, + PointsModel, + PolygonsModel, Scale, + Sequence, ShapesModel, SpatialData, TableModel, - PolygonsModel, - PointsModel, - set_transform, - get_transform, - Sequence, get_dims, - MapAxis, + get_transform, + set_transform, ) from spatialdata._core.core_utils import SpatialElement -from spatialdata._core.transformations import BaseTransformation from spatialdata._core.models import _parse_transform +from spatialdata._core.transformations import BaseTransformation +from spatialdata._io.format import SpatialDataFormatV01 from spatialdata._io.read import _read_multiscale -from typing import Optional, Dict, Any -import re -import json -import tifffile -import pandas as pd +from spatialdata._io.write import ( + write_points, + write_polygons, + write_shapes, + write_table, +) from tqdm import tqdm -from multiprocessing import Pool -from functools import partial -from itertools import chain -import time -import zarr -from ome_zarr.io import ZarrLocation -from ome_zarr.reader import Label, Multiscales, Node, Reader -from ome_zarr.io import parse_url -from ome_zarr.writer import write_multiscales_metadata -import pyarrow.parquet as pq -from loguru import logger -from anndata import AnnData -from spatialdata._io.format import SpatialDataFormatV01 DEBUG = False # DEBUG = False @@ -55,7 +53,7 @@ __all__ = ["convert_xenium_to_ngff"] -def _identify_files(in_path: str, library_id: Optional[str] = None) -> Dict[str, Any]: +def _identify_files(in_path: str, library_id: Optional[str] = None) -> dict[str, Any]: files = os.listdir(in_path) xenium_files = [f for f in files if f.endswith(".xenium")] assert len(xenium_files) == 1 @@ -93,7 +91,7 @@ def _get_zarr_group(out_path: str, element_type: str, name: str, delete_existing def _convert_polygons( - in_path: str, data: Dict[str, Any], out_path: str, name: str, num_workers: int, pixel_size: float + in_path: str, data: dict[str, Any], out_path: str, name: str, num_workers: int, pixel_size: float ) -> None: df = pd.read_csv(f"{in_path}/{data['run_name']}_{name}.csv.gz") if DEBUG: @@ -105,7 +103,7 @@ def _convert_polygons( splits = np.array_split(range(1, n_cells), pool._processes) nested = pool.map(partial(_build_polygons, df=df), splits) start = time.time() - nested_sorted = map(lambda x: x[0], sorted(nested, key=lambda x: x[1])) + nested_sorted = map(lambda x: x[0], sorted(nested, key=lambda x: x[1])) # noqa: C417 polygons = list(chain.from_iterable(nested_sorted)) print(f"list flattening: {time.time() - start}") start = time.time() @@ -118,7 +116,7 @@ def _convert_polygons( write_polygons(polygons=parsed, group=group, name=name) -def _convert_points(in_path: str, data: Dict[str, Any], out_path: str, pixel_size: float) -> None: +def _convert_points(in_path: str, data: dict[str, Any], out_path: str, pixel_size: float) -> None: # using parquet is 10 times faster than reading from csv start = time.time() name = "transcripts" @@ -126,13 +124,13 @@ def _convert_points(in_path: str, data: Dict[str, Any], out_path: str, pixel_siz xyz = table.select(("x_location", "y_location", "z_location")).to_pandas().to_numpy() # TODO: the construction of the sparse matrix is slow, optimize by converting to a categorical, the code in the # parser needs to be adapted - annotations = table.select(('overlaps_nucleus', 'qv', 'cell_id')) - annotations = annotations.add_column(3, 'feature_name', table.column("feature_name").cast( - "string").dictionary_encode()) + annotations = table.select(("overlaps_nucleus", "qv", "cell_id")) + annotations = annotations.add_column( + 3, "feature_name", table.column("feature_name").cast("string").dictionary_encode() + ) if DEBUG: n = 100000 xyz = xyz[:n] - feature_name = feature_name[:n] logger.info(f"DEBUG: only using {n} transcripts") print(f"parquet: {time.time() - start}") ## @@ -145,7 +143,7 @@ def _convert_points(in_path: str, data: Dict[str, Any], out_path: str, pixel_siz write_points(points=parsed, group=group, name=name) -def _convert_table_and_shapes(in_path: str, data: Dict[str, Any], out_path: str, pixel_size: float) -> None: +def _convert_table_and_shapes(in_path: str, data: dict[str, Any], out_path: str, pixel_size: float) -> None: name = "cells" df = pd.read_csv(f"{in_path}/{data['run_name']}_{name}.csv.gz") feature_matrix = sc.read_10x_h5(f"{in_path}/{data['run_name']}_cell_feature_matrix.h5") @@ -258,7 +256,7 @@ def _ome_ngff_dims_workaround(zarr_path: str): shutil.rmtree(temp_path) -def _convert_image(in_path: str, data: Dict[str, Any], out_path: str, name: str, num_workers: int) -> None: +def _convert_image(in_path: str, data: dict[str, Any], out_path: str, name: str, num_workers: int) -> None: image = f"{in_path}/{data['run_name']}_{name}.ome.tif" assert os.path.isfile(image) _ = _get_zarr_group(out_path, "images", name) @@ -286,7 +284,7 @@ def _convert_image(in_path: str, data: Dict[str, Any], out_path: str, name: str, shutil.move(os.path.join(full_out_path, "temp", f), os.path.join(full_out_path, f)) shutil.rmtree(os.path.join(full_out_path, "temp")) _ome_ngff_dims_workaround(full_out_path) - except FileNotFoundError as e: + except FileNotFoundError: ## raise FileNotFoundError( "bioformats2raw not found, please check https://github.com/glencoesoftware/bioformats2raw for the " @@ -309,8 +307,8 @@ def _update_transformation(transform: BaseTransformation, group: zarr.Group, ele scale_factors = base_resolution / current_resolution dataset = datasets[i] assert len(dataset) == 2 - path = dataset["path"] - multiscale_transform = dataset["coordinateTransformations"] + dataset["path"] + dataset["coordinateTransformations"] # this is completely wrong: no idea why but bioformats2raw gives a scale for the first multiscale with value # smaller than 1. It should be 1, so we recompute it here # transforms = [BaseTransformation.from_dict(t) for t in multiscale_transform] @@ -365,6 +363,7 @@ def convert_xenium_to_ngff( skip_image_morphology_mip: bool = False, skip_image_morphology_focus: bool = True, ) -> None: + """Convert Xenium to NGFF""" if num_workers == -1: MAX_WORKERS = psutil.cpu_count() logger.info( diff --git a/tests/test_basic.py b/tests/test_basic.py index d6ed0d54..4539331c 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,12 +1,5 @@ -import pytest - import spatialdata_io def test_package_has_version(): spatialdata_io.__version__ - - -@pytest.mark.skip(reason="This decorator should be removed when test passes.") -def test_example(): - assert 1 == 0 # This test is designed to fail.