diff --git a/Dockerfile b/Dockerfile index 8b87d9aa..918bb6b4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,51 +1,37 @@ -FROM ubuntu:bionic - -# Update system -RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections - -# Configure locales -RUN apt-get update -y && \ - apt-get install -y --no-install-recommends locales && \ - apt-get clean -y && \ - rm -rf /var/lib/apt/lists/* -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 -RUN locale-gen en_US.UTF-8 - -# Install necessary packages -RUN apt-get update -y && \ - apt-get install -y --no-install-recommends git pkg-config libtool automake autoconf make g++ liblzma-dev coreutils meson ninja-build wget zlib1g-dev libicu-dev libgumbo-dev libmagic-dev ca-certificates && \ - apt-get clean -y && \ - rm -rf /var/lib/apt/lists/* - -# Update CA certificates -RUN update-ca-certificates - -# Install Xapian (wget zlib1g-dev) -RUN wget https://oligarchy.co.uk/xapian/1.4.14/xapian-core-1.4.14.tar.xz -RUN tar xvf xapian-core-1.4.14.tar.xz -RUN cd xapian-core-1.4.14 && ./configure -RUN cd xapian-core-1.4.14 && make all install -RUN rm -rf xapian - -# Install zimlib (libicu-dev) -RUN git clone https://github.com/openzim/libzim.git -RUN cd libzim && git checkout 6.0.2 -RUN cd libzim && meson . build -RUN cd libzim && ninja -C build install -RUN rm -rf libzim - -RUN ldconfig -ENV LD_LIBRARY_PATH /usr/local/lib/x86_64-linux-gnu/ - -# Install python dependecies - -RUN apt-get update -y && \ - apt-get install -y --no-install-recommends python-dev python3-dev python3-pip && \ - apt-get clean -y && \ - rm -rf /var/lib/apt/lists/* - -# Install Cython - -RUN pip3 install Cython +# A minimal runtime environment for python-libzim using pre-built releases. +# Usage: +# docker build . --tag openzim:python-libzim +# docker run -it openzim:python-libzim +# >>> from libzim import ZimCreator, ZimArticle, ZimBlob +# docker run -it openzim:python-libzim ./some_example_script.py + +FROM python:3.7-buster + +ENV LIBZIM_VERSION 6.1.1 +ENV LIBZIM_RELEASE libzim_linux-x86_64-$LIBZIM_VERSION +ENV LIBZIM_LIBRARY_PATH lib/x86_64-linux-gnu/libzim.so.$LIBZIM_VERSION +ENV LIBZIM_INCLUDE_PATH include/zim + +# Install libzim from pre-built release +RUN wget -qO- https://download.openzim.org/release/libzim/$LIBZIM_RELEASE.tar.gz \ + | tar -xz -C . \ + && mv $LIBZIM_RELEASE/$LIBZIM_LIBRARY_PATH /usr/lib/libzim.so \ + && mv $LIBZIM_RELEASE/$LIBZIM_INCLUDE_PATH /usr/include/zim \ + && ldconfig + # installing these system-wide inside of docker allows + # users to run their dockerized code without needing to muck + # around with LDFLAGS and CPPFLAGS to find libzim. + # there will be only one copy of libzim, and it will be + # automatically available to all software system-wide + +# Install python dependencies +RUN pip3 install --no-cache-dir --upgrade \ + pip cython==0.29.6 setuptools wheel pytest + +# Install python-libzim from local source +ADD . /opt/python-libzim +WORKDIR /opt/python-libzim +RUN pip install -e . +VOLUME /opt/python-libzim + +ENTRYPOINT ["/usr/bin/env", "python3"] diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 00000000..142d863a --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1,60 @@ +# A full development environment with everything built from source. +# Usage: +# docker build . -f Dockerfile.dev --tag openzim:python-libzim-dev +# docker run -it openzim:python-libzim-dev +# $ black . && flake8 . && pytest . +# $ pipenv install --dev +# $ python setup.py build_ext +# $ python setup.py sdist bdist_wheel +# $ python setup.py install +# $ python -c "from libzim import ZimArticle" + +FROM python:3.7-buster + +ENV LIBZIM_VERSION 6.1.1 +ENV LIBZIM_REPOSITORY https://github.com/openzim/libzim.git + +ENV XAPIAN_VERSION 1.4.14 +ENV XAPIAN_RELEASE xapian-core-1.4.14 +ENV XAPIAN_URL https://oligarchy.co.uk/xapian/$XAPIAN_VERSION/$XAPIAN_RELEASE.tar.xz + +WORKDIR /opt/ + +# Install C++ build environment +RUN apt-get -qq update && \ + apt-get -qq install -y --no-install-recommends \ + coreutils wget git ca-certificates \ + g++ pkg-config libtool automake autoconf make meson ninja-build \ + liblzma-dev zlib1g-dev libicu-dev libgumbo-dev libmagic-dev && \ + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/* + +# Build & install Xapian from source: /opt/xapian +RUN wget $XAPIAN_URL && \ + tar xvf $XAPIAN_RELEASE.tar.xz && \ + cd $XAPIAN_RELEASE && \ + ./configure && \ + make all install && \ + ldconfig + +# Build & install libzim from source: /opt/libzim +RUN git clone $LIBZIM_REPOSITORY --depth 1 --branch $LIBZIM_VERSION && \ + cd libzim && \ + meson . build && \ + ninja -C build install && \ + ldconfig + +# Install python dependecies +RUN pip3 install --no-cache-dir --upgrade \ + pip pipenv cython==0.29.6 wheel pytest tox ipython black flake8 mypy + +# Add local source code dir to docker container +ADD . /opt/python-libzim +VOLUME /opt/python-libzim +WORKDIR /opt/python-libzim + +# Build & install python-libzim from source: /opt/python-libzim +RUN python3 setup.py build_ext && \ + pip install -e . + +CMD ["/bin/bash"] diff --git a/Pipfile b/Pipfile index fc479fd6..1f288406 100644 --- a/Pipfile +++ b/Pipfile @@ -4,8 +4,14 @@ url = "https://pypi.org/simple" verify_ssl = true [dev-packages] -pytest = "*" cython = "==0.29.6" -e1839a8 = {editable = true, path = "."} +setuptools = "*" +wheel = "*" +ipython = "*" +black = "*" +flake8 = "*" +mypy = "*" +pytest = "*" +twine = "*" [packages] diff --git a/README.md b/README.md index 510e9156..ab9b40e8 100644 --- a/README.md +++ b/README.md @@ -1,76 +1,198 @@ -# Setup +# python-libzim + +> The Python bindings for [`libzim`](https://github.com/openzim/libzim). + +```bash +# Install from PyPI: https://pypi.org/project/libzim/ +pip3 install libzim +``` + +This library allows you to interact with `.zim` files via Python. + +It just provides a shallow Python interface on top of the `libzim` C++ library (maintained by [OpenZIM](https://github.com/openzim)). + +It is primarily used by [`sotoki`](https://github.com/openzim/sotoki). + +## Quickstart + +### Reader API + +```python3 +from libzim.reader import File + +f = File("test.zim") +article = f.get_article("article/url.html") +print(article.url, article.title) +if not article.is_redirect(): + print(article.content) +``` + +### Write API + +See [example](examples/basic_writer.py) for a basic usage of the writer API. + + +--- + +## User Documentation + +### Setup: Ubuntu/Debian `x86_64` (Recommended) + +Install the python `libzim` package from PyPI. + +```bash +pip3 install libzim +``` + +The `x86_64` linux wheel automatically includes the `libzim.so` dylib and headers, but other platforms may need to install `libzim` and its headers manually. + + +### Installing the `libzim` dylib and headers manually + +If you are not on a linux `x86_64` platform, you will have to install libzim manually. + +Either by get a prebuilt binary at https://download.openzim.org/release/libzim +or [compile `libzim` from source](https://github.com/openzim/libzim). + +If you have not installed libzim in standard directory, you will have to set `LD_LIBRARY_PATH` to allow python to find the library : + +Assuming you have extracted (or installed) the library if LIBZIM_DIR: + ```bash -docker-compose build -docker-compose run libzim /bin/bash +export LD_LIBRARY_PATH="${LIBZIM_DIR}/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH" ``` + +## Setup: Docker (Optional) + ```bash -python setup.py build_ext -i -python tests/test_libzim.py +docker build . --tag openzim:python-libzim -# or +# Run a custom script inside the container +docker run -it openzim:python-libzim ./some_example_script.py -./rebuild.sh -./run_tests +# Or use the python repl interactively +docker run -it openzim:python-libzim +>>> import libzim ``` -Example: +--- -```python3 -from libzim import ZimArticle, ZimBlob, ZimCreator +## Developer Documentation + +**These instructions are for developers working on the `python-libzim` source code itself.** *If you are simply a user of the library and you don't intend to change its internal source code, follow the User Documentation instructions above instead.* + +### Setup: Ubuntu/Debian + +*Note: Make sure you've installed `libzim` dylib + headers first (see above).* + +```bash +apt install coreutils wget git ca-certificates \ + g++ pkg-config libtool automake autoconf make meson ninja-build \ + liblzma-dev zlib1g-dev libicu-dev libgumbo-dev libmagic-dev + +pip3 install --upgrade pip pipenv + +export CFLAGS="-I${LIBZIM_DIR}/include" +export LDFLAGS="-L${LIBZIM_DIR}/lib/x86_64-linux-gnu" +git clone https://github.com/openzim/python-libzim +cd python-libzim +python setup.py build_ext +pipenv install --dev +pipenv run pip install -e . +``` -class ZimTestArticle(ZimArticle): - content = ''' - - - Monadical - -

ñññ Hello, it works ñññ

''' +### Setup: Docker - def __init__(self): - ZimArticle.__init__(self) +```bash +docker build . -f Dockerfile.dev --tag openzim:python-libzim-dev - def is_redirect(self): - return False +docker run -it openzim:python-libzim-dev ./some_example_script.py - def get_url(self): - return "A/Monadical_SAS" +docker run -it openzim:python-libzim-dev +$ black . && flake8 . && pytest . +$ pipenv install --dev +$ python setup.py build_ext +$ python setup.py sdist bdist_wheel +$ python setup.py install +$ python -c "import libzim" - def get_title(self): - return "Monadical SAS" - - def get_mime_type(self): - return "text/html" - - def get_filename(self): - return "" - - def should_compress(self): - return True +``` - def should_index(self): - return True +--- - def get_data(self): - return ZimBlob(self.content.encode('UTF-8')) +## Common Tasks -# Create a ZimTestArticle article +### Run Linters & Tests -article = ZimTestArticle() +```bash +# Autoformat code with black +black --exclude=setup.py . +# Lint and check for errors with flake8 +flake8 --exclude=setup.py . +# Typecheck with mypy (optional) +mypy . +# Run tests +pytest . +``` -# Write the articles +### Rebuild Cython extension during development -import uuid -rnd_str = str(uuid.uuid1()) -test_zim_file_path = "/opt/python-libzim/tests/kiwix-test" +```bash +rm libzim/libzim.cpp +rm -Rf build +rm -Rf *.so +python setup.py build_ext +python setup.py install +``` -with ZimCreator(test_zim_file_path + '-' + rnd_str + '.zim') as zc: - zc.add_article(article) - if not zc.mandatory_metadata_ok(): - zc.update_metadata(creator='python-libzim', - description='Created in python', - name='Hola',publisher='Monadical', - title='Test Zim') +### Build package `sdist` and `bdist_wheels` for PyPI +```bash +python setup.py build_ext +python setup.py sdist bdist_wheel + +# upload to PyPI (caution: this is done automatically via Github Actions) +twine upload dist/* +``` + +### Use a specific `libzim` dylib and headers when compiling `python-libzim` + +```bash +export CFLAGS="-I${LIBZIM_DIR}/include" +export LDFLAGS="-L${LIBZIM_DIR}/lib/x86_64-linux-gnu" +export LD_LIBRARY_PATH="${LIBZIM_DIR}/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH" +python setup.py build_ext +python setup.py install ``` +--- + +## Further Reading + +### Related Projects +- https://github.com/openzim/sotoki +- https://framagit.org/mgautierfr/pyzim +- https://github.com/pediapress/pyzim +- https://github.com/jarondl/pyzimmer/blob/master/pyzimmer/zim_writer.py + +### Research +- https://github.com/cython/cython/wiki/AutoPxd +- https://www.youtube.com/watch?v=YReJ3pSnNDo +- https://github.com/openzim/zim-tools/blob/master/src/zimrecreate.cpp +- https://github.com/cython/cython/wiki/enchancements-inherit_CPP_classes +- https://groups.google.com/forum/#!topic/cython-users/vAB9hbLMxRg + +### Debugging +- https://cython.readthedocs.io/en/latest/src/userguide/debugging.html +- https://github.com/cython/cython/wiki/DebuggingTechniques +- https://stackoverflow.com/questions/2663841/python-tracing-a-segmentation-fault +- https://cython-devel.python.narkive.com/cW3Cn1th/debugging-a-segfault-in-a-cython-generated-module +- https://groups.google.com/forum/#!topic/cython-users/B_Sxj2NV1PE + +### Packaging +- https://download.openzim.org/release/libzim/ +- https://cibuildwheel.readthedocs.io/en/stable/faq/ +- https://github.com/pypa/manylinux +- https://github.com/RalfG/python-wheels-manylinux-build/blob/master/full_workflow_example.yml +- https://packaging.python.org/guides/packaging-binary-extensions/#publishing-binary-extensions diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 6b028132..00000000 --- a/docker-compose.yml +++ /dev/null @@ -1,13 +0,0 @@ -version: '3' - -services: - libzim: - build: - context: . - dockerfile: ./Dockerfile - image: kiwix:python-libzim - working_dir: /opt/python-libzim - stdin_open: true - tty: true - volumes: - - .:/opt/python-libzim diff --git a/examples/basic.py b/examples/basic_writer.py similarity index 74% rename from examples/basic.py rename to examples/basic_writer.py index 9556d9b4..59ec8bc9 100644 --- a/examples/basic.py +++ b/examples/basic_writer.py @@ -74,45 +74,10 @@ def get_data(self): article = TestArticle("Monadical_SAS", "Monadical", content) article2 = TestArticle("Monadical_2", "Monadical 2", content2) -print(article.content) - - -rnd_str = str(uuid.uuid1()) - -test_zim_file_path = "/opt/python-libzim/tests/kiwix-test" - -zim_creator = Creator( - test_zim_file_path + "-" + rnd_str + ".zim", - main_page="Monadical", - index_language="eng", - min_chunk_size=2048, -) - -# Add articles to zim file -zim_creator.add_article(article) -zim_creator.add_article(article2) - -# Set mandatory metadata -if not zim_creator.mandatory_metadata_ok(): - zim_creator.update_metadata( - creator="python-libzim", - description="Created in python", - name="Hola", - publisher="Monadical", - title="Test Zim", - ) - -print(zim_creator._get_metadata()) - -# Write articles to zim file -zim_creator.finalize() - - -# Example using context manager to ensure finalize is called. - rnd_str = str(uuid.uuid1()) +zim_file_path = f"kiwix-test-{rnd_str}.zim" -with Creator(test_zim_file_path + "-" + rnd_str + ".zim") as zc: +with Creator(zim_file_path, main_page="Monadical", index_language="eng", min_chunk_size=2048) as zc: zc.add_article(article) zc.add_article(article2) zc.update_metadata( diff --git a/libzim/writer.py b/libzim/writer.py index af8f2fbc..cd17f218 100644 --- a/libzim/writer.py +++ b/libzim/writer.py @@ -172,7 +172,7 @@ def update_metadata(self, **kwargs): def write_metadata(self): for key, value in self._metadata.items(): - if key == "date" and isinstance(value, datetime.date): + if key == "Date" and isinstance(value, datetime.date): value = value.strftime("%Y-%m-%d") article = MetadataArticle(key, value) self._creatorWrapper.add_article(article)