diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..020ff6f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +__pycache__ +build +libzim_wrapper.*.so +libzim/libzim_wrapper.cpp +libzim/libzim_wrapper.h +libzim/libzim_wrapper_api.h +*.egg-info diff --git a/libzim/__init__.py b/libzim/__init__.py new file mode 100644 index 00000000..b5b0eec2 --- /dev/null +++ b/libzim/__init__.py @@ -0,0 +1,23 @@ +# This file is part of python-libzim +# (see https://github.com/libzim/python-libzim) +# +# Copyright (c) 2020 Juan Diego Caballero +# Copyright (c) 2020 Matthieu Gautier +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +from libzim_wrapper import Blob + +__all__ = ["Blob"] diff --git a/libzim/examples.py b/libzim/examples.py index 132c7dd9..9556d9b4 100644 --- a/libzim/examples.py +++ b/libzim/examples.py @@ -17,12 +17,15 @@ # along with this program. If not, see . -from libzim import ZimArticle, ZimBlob, ZimCreator +# Write the article +import uuid + +from libzim.writer import Article, Blob, Creator -class ZimTestArticle(ZimArticle): +class TestArticle(Article): def __init__(self, url, title, content): - ZimArticle.__init__(self) + Article.__init__(self) self.url = url self.title = title self.content = content @@ -35,13 +38,13 @@ def get_url(self): def get_title(self): return f"{self.title}" - + def get_mime_type(self): return "text/html" - + def get_filename(self): return "" - + def should_compress(self): return True @@ -49,36 +52,41 @@ def should_index(self): return True def get_data(self): - return ZimBlob(self.content) + return Blob(self.content) -# Create a ZimTestArticle article -content = ''' +# Create a TestArticle article + +content = """ Monadical -

ñññ Hello, it works ñññ

''' +

ñññ Hello, it works ñññ

""" -content2 = ''' +content2 = """ Monadical 2 -

ñññ Hello, it works 2 ñññ

''' +

ñññ Hello, it works 2 ñññ

""" -article = ZimTestArticle("Monadical_SAS", "Monadical", content) -article2 = ZimTestArticle("Monadical_2", "Monadical 2", content2) +article = TestArticle("Monadical_SAS", "Monadical", content) +article2 = TestArticle("Monadical_2", "Monadical 2", content2) print(article.content) -# Write the article -import uuid -rnd_str = str(uuid.uuid1()) + +rnd_str = str(uuid.uuid1()) test_zim_file_path = "/opt/python-libzim/tests/kiwix-test" -zim_creator = ZimCreator(test_zim_file_path + '-' + rnd_str + '.zim',main_page = "Monadical",index_language= "eng", min_chunk_size= 2048) +zim_creator = Creator( + test_zim_file_path + "-" + rnd_str + ".zim", + main_page="Monadical", + index_language="eng", + min_chunk_size=2048, +) # Add articles to zim file zim_creator.add_article(article) @@ -86,7 +94,13 @@ def get_data(self): # Set mandatory metadata if not zim_creator.mandatory_metadata_ok(): - zim_creator.update_metadata(creator='python-libzim',description='Created in python',name='Hola',publisher='Monadical',title='Test Zim') + zim_creator.update_metadata( + creator="python-libzim", + description="Created in python", + name="Hola", + publisher="Monadical", + title="Test Zim", + ) print(zim_creator._get_metadata()) @@ -98,11 +112,13 @@ def get_data(self): rnd_str = str(uuid.uuid1()) -with ZimCreator(test_zim_file_path + '-' + rnd_str + '.zim') as zc: +with Creator(test_zim_file_path + "-" + rnd_str + ".zim") as zc: zc.add_article(article) zc.add_article(article2) - zc.update_metadata(creator='python-libzim', - description='Created in python', - name='Hola',publisher='Monadical', - title='Test Zim') - + zc.update_metadata( + creator="python-libzim", + description="Created in python", + name="Hola", + publisher="Monadical", + title="Test Zim", + ) diff --git a/libzim/lib.cxx b/libzim/lib.cxx index 387f3bf1..011691de 100644 --- a/libzim/lib.cxx +++ b/libzim/lib.cxx @@ -23,7 +23,7 @@ #include #include "lib.h" -#include "libzim_api.h" +#include "libzim_wrapper_api.h" #include #include @@ -38,7 +38,7 @@ ZimArticleWrapper::ZimArticleWrapper(PyObject *obj) : m_obj(obj) { - if (import_libzim()) + if (import_libzim_wrapper()) { std::cerr << "Error executing import_libzim!\n"; throw std::runtime_error("Error executing import_libzim"); diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx deleted file mode 100644 index 37f6bdb7..00000000 --- a/libzim/libzim.pyx +++ /dev/null @@ -1,367 +0,0 @@ -# This file is part of python-libzim -# (see https://github.com/libzim/python-libzim) -# -# Copyright (c) 2020 Juan Diego Caballero -# Copyright (c) 2020 Matthieu Gautier -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -cimport libzim -cimport cpython.ref as cpy_ref -from cython.operator import dereference - -from libc.stdint cimport uint32_t, uint64_t -from libcpp.string cimport string -from libcpp cimport bool -from libcpp.memory cimport shared_ptr, make_shared - -import datetime -from contextlib import contextmanager -from collections import defaultdict - -######################### -# ZimBlob # -######################### - -cdef class ZimBlob: - cdef Blob* c_blob - cdef bytes ref_content - - def __cinit__(self, content): - if isinstance(content, str): - self.ref_content = content.encode('UTF-8') - else: - self.ref_content = content - self.c_blob = new Blob( self.ref_content, len(self.ref_content)) - - def __dealloc__(self): - if self.c_blob != NULL: - del self.c_blob - - -######################### -# ZimArticle # -######################### - -cdef class ZimArticle: - cdef ZimBlob blob - - def get_url(self): - raise NotImplementedError - - def get_title(self): - raise NotImplementedError - - def is_redirect(self): - raise NotImplementedError - - def get_mime_type(self): - raise NotImplementedError - - def get_filename(self): - raise NotImplementedError - - def should_compress(self): - raise NotImplementedError - - def should_index(self): - raise NotImplementedError - - def redirect_url(self): - raise NotImplementedError - - def _get_data(self): - if self.blob is None: - self.blob = self.get_data() - return self.blob - - def get_data(self): - raise NotImplementedError - - -#------ Helper for pure virtual methods -------- - -cdef get_article_method_from_object_ptr(void *ptr, string method, int *error) with gil: - cdef ZimArticle art = (ptr) - try: - func = getattr(art, method.decode('UTF-8')) - except AttributeError: - error[0] = 1 - raise - else: - error[0] = 0 - return func - -#------- ZimArticle pure virtual methods -------- - -cdef public api: - string string_cy_call_fct(void *ptr, string method, int *error) with gil: - """Lookup and execute a pure virtual method on ZimArticle returning a string""" - func = get_article_method_from_object_ptr(ptr, method, error) - ret_str = func() - return ret_str.encode('UTF-8') - - Blob blob_cy_call_fct(void *ptr, string method, int *error) with gil: - """Lookup and execute a pure virtual method on ZimArticle returning a Blob""" - cdef ZimBlob blob - - func = get_article_method_from_object_ptr(ptr, method, error) - blob = func() - return dereference(blob.c_blob) - - bool bool_cy_call_fct(void *ptr, string method, int *error) with gil: - """Lookup and execute a pure virtual method on ZimArticle returning a bool""" - func = get_article_method_from_object_ptr(ptr, method, error) - return func() - - uint64_t int_cy_call_fct(void *ptr, string method, int *error) with gil: - """Lookup and execute a pure virtual method on ZimArticle returning an int""" - func = get_article_method_from_object_ptr(ptr, method, error) - return func() - -######################### -# ZimCreator # -######################### - -#TODO Write metadata - -class ZimMetadataArticle(ZimArticle): - - def __init__(self,url, metadata_content): - ZimArticle.__init__(self) - self.url = url - self.metadata_content = metadata_content - - def is_redirect(self): - return False - - def get_url(self): - return f"M/{self.url}" - - def get_title(self): - return "" - - def get_mime_type(self): - return "text/plain" - - def get_filename(self): - return "" - - def should_compress(self): - return True - - def should_index(self): - return False - - def get_data(self): - return ZimBlob(self.metadata_content) - - -MANDATORY_METADATA_KEYS =[ - "Name", - "Title", - "Creator", - "Publisher", - "Date", - "Description", - "Language"] - # Optional - #"LongDescription", - #"Licence", - #"Tags", - #"Flavour", - #"Source", - #"Counter", - #"Scraper"] - -cdef class ZimCreator: - """ - A class to represent a Zim Creator. - - Attributes - ---------- - *c_creator : zim.ZimCreatorWrapper - a pointer to the C++ Creator object - _finalized : bool - flag if the creator was finalized - _filename : str - Zim file path - _main_page : str - Zim file main page - _index_language : str - Zim file Index language - _min_chunk_size : str - Zim file minimum chunk size - _article_counter - Zim file article counter - _metadata - Zim file metadata - """ - - cdef ZimCreatorWrapper *c_creator - cdef bool _finalized - cdef object _filename - cdef object _main_page - cdef object _index_language - cdef object _min_chunk_size - cdef object _article_counter - cdef dict __dict__ - - def __cinit__(self, str filename, str main_page = "", str index_language = "eng", min_chunk_size = 2048): - """Constructs a ZimCreator from parameters. - Parameters - ---------- - filename : str - Zim file path - main_page : str - Zim file main page - index_language : str - Zim file index language (default eng) - min_chunk_size : int - Minimum chunk size (default 2048) - """ - - self.c_creator = ZimCreatorWrapper.create(filename.encode("UTF-8"), main_page.encode("UTF-8"), index_language.encode("UTF-8"), min_chunk_size) - self._finalized = False - self._filename = filename - self._main_page = self.c_creator.getMainUrl().getLongUrl().decode("UTF-8", "strict") - self._index_language = index_language - self._min_chunk_size = min_chunk_size - self._metadata = {k:b"" for k in MANDATORY_METADATA_KEYS} - - self._article_counter = defaultdict(int) - self.update_metadata(date=datetime.date.today(), language= index_language) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.finalize() - - def __dealloc__(self): - del self.c_creator - - @property - def filename(self): - """Get the filename of the ZimCreator object""" - return self._filename - - @property - def main_page(self): - """Get the main page of the ZimCreator object""" - return self.c_creator.getMainUrl().getLongUrl().decode("UTF-8", "strict")[2:] - - @main_page.setter - def main_page(self,new_url): - """Set the main page of the ZimCreator object""" - # Check if url longformat is used - if new_url[1] == '/': - raise ValueError("Url should not include a namespace") - - self.c_creator.setMainUrl(new_url.encode('UTF-8')) - - @property - def index_language(self): - """Get the index language of the ZimCreator object""" - return self._index_language - - @property - def min_chunk_size(self): - """Get the minimum chunk size of the ZimCreator object""" - return self._min_chunk_size - - def get_article_counter_string(self): - return ";".join(["%s=%s" % (k,v) for (k,v) in self._article_counter.items()]) - - def _get_metadata(self): - metadata = self._metadata - - counter_string = self.get_article_counter_string() - if counter_string: - metadata['Counter'] = counter_string - - return metadata - - def mandatory_metadata_ok(self): - """Flag if mandatory metadata is complete and not empty""" - metadata_item_ok = [self._metadata[k] for k in MANDATORY_METADATA_KEYS] - return all(metadata_item_ok) - - def update_metadata(self, **kwargs): - "Updates article metadata""" - # Converts python case to pascal case. example: long_description-> LongDescription - pascalize = lambda keyword: "".join(keyword.title().split("_")) - - if "date" in kwargs and isinstance(kwargs['date'],datetime.date): - kwargs['date'] = kwargs['date'].strftime('%Y-%m-%d') - - new_metadata = {pascalize(key): value for key, value in kwargs.items()} - self._metadata.update(new_metadata) - - def _update_article_counter(self, ZimArticle article not None): - # default dict update - self._article_counter[article.get_mime_type().strip()] += 1 - - def add_article(self, article not None): - """Add a ZimArticle to the Creator object. - - Parameters - ---------- - article : ZimArticle - The article to add to the file - Raises - ------ - RuntimeError - If the ZimArticle provided is not ready for writing - RuntimeError - If the ZimCreator was already finalized - """ - if self._finalized: - raise RuntimeError("ZimCreator already finalized") - - # Make a shared pointer to ZimArticleWrapper from the ZimArticle object (dereference internal c_article) - cdef shared_ptr[ZimArticleWrapper] art = shared_ptr[ZimArticleWrapper]( - new ZimArticleWrapper(article)); - try: - with nogil: - self.c_creator.addArticle(art) - except: - raise - else: - if not article.is_redirect(): - self._update_article_counter(article) - - def write_metadata(self, dict metadata): - for key in metadata: - metadata_article = ZimMetadataArticle(url=key, metadata_content=metadata[key]) - self.add_article(metadata_article) - - def finalize(self): - """finalize and write added articles to the file. - - Raises - ------ - RuntimeError - If the ZimCreator was already finalized - """ - if self._finalized: - raise RuntimeError("ZimCreator already finalized") - - self.write_metadata(self._get_metadata()) - with nogil: - self.c_creator.finalize() - self._finalized = True - - def __repr__(self): - return f"{self.__class__.__name__}(filename={self.filename})" diff --git a/libzim/libzim.pxd b/libzim/libzim_wrapper.pxd similarity index 98% rename from libzim/libzim.pxd rename to libzim/libzim_wrapper.pxd index 0e1bf2bc..adf3a40c 100644 --- a/libzim/libzim.pxd +++ b/libzim/libzim_wrapper.pxd @@ -17,13 +17,14 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from libcpp.string cimport string +from cpython.ref cimport PyObject + from libc.stdint cimport uint32_t, uint64_t from libcpp cimport bool from libcpp.memory cimport shared_ptr +from libcpp.string cimport string from libcpp.vector cimport vector -from cpython.ref cimport PyObject cdef extern from "zim/blob.h" namespace "zim": cdef cppclass Blob: @@ -38,12 +39,12 @@ cdef extern from "zim/writer/url.h" namespace "zim::writer": string getLongUrl() except + -cdef extern from "zim/writer/article.h" namespace "zim::writer": +cdef extern from "zim/writer/article.h" namespace "zim::writer": cdef cppclass Article: const string getTitle() except + -cdef extern from "lib.h": +cdef extern from "lib.h": cdef cppclass ZimArticleWrapper(Article): ZimArticleWrapper(PyObject *obj) except + const Url getUrl() except + @@ -55,7 +56,7 @@ cdef extern from "lib.h": const bool shouldIndex() except + const Url getRedirectUrl() except + const Blob getData() except + - + cdef cppclass ZimCreatorWrapper: @staticmethod ZimCreatorWrapper *create(string fileName, string mainPage, string fullTextIndexLanguage, int minChunkSize) nogil except + diff --git a/libzim/libzim_wrapper.pyx b/libzim/libzim_wrapper.pyx new file mode 100644 index 00000000..21105bdc --- /dev/null +++ b/libzim/libzim_wrapper.pyx @@ -0,0 +1,163 @@ +# This file is part of python-libzim +# (see https://github.com/libzim/python-libzim) +# +# Copyright (c) 2020 Juan Diego Caballero +# Copyright (c) 2020 Matthieu Gautier +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +cimport libzim.libzim_wrapper as clibzim + +from cython.operator import dereference +from cpython.ref cimport PyObject + +from libc.stdint cimport uint64_t +from libcpp.string cimport string +from libcpp cimport bool +from libcpp.memory cimport shared_ptr, make_shared + +import datetime + + + + +######################### +# Blob # +######################### + +cdef class Blob: + cdef clibzim.Blob* c_blob + cdef bytes ref_content + + def __cinit__(self, content): + if isinstance(content, str): + self.ref_content = content.encode('UTF-8') + else: + self.ref_content = content + self.c_blob = new clibzim.Blob( self.ref_content, len(self.ref_content)) + + def __dealloc__(self): + if self.c_blob != NULL: + del self.c_blob + + +#------ Helper for pure virtual methods -------- + +cdef get_article_method_from_object(object obj, string method, int *error) with gil: + try: + func = getattr(obj, method.decode('UTF-8')) + except AttributeError: + error[0] = 1 + raise + else: + error[0] = 0 + return func + +#------- ZimArticle pure virtual methods -------- + +cdef public api: + string string_cy_call_fct(object obj, string method, int *error) with gil: + """Lookup and execute a pure virtual method on ZimArticle returning a string""" + func = get_article_method_from_object(obj, method, error) + ret_str = func() + return ret_str.encode('UTF-8') + + clibzim.Blob blob_cy_call_fct(object obj, string method, int *error) with gil: + """Lookup and execute a pure virtual method on ZimArticle returning a Blob""" + cdef Blob blob + + func = get_article_method_from_object(obj, method, error) + blob = func() + return dereference(blob.c_blob) + + bool bool_cy_call_fct(object obj, string method, int *error) with gil: + """Lookup and execute a pure virtual method on ZimArticle returning a bool""" + func = get_article_method_from_object(obj, method, error) + return func() + + uint64_t int_cy_call_fct(object obj, string method, int *error) with gil: + """Lookup and execute a pure virtual method on ZimArticle returning an int""" + func = get_article_method_from_object(obj, method, error) + return func() + +cdef class Creator: + """ + A class to represent a Zim Creator. + + Attributes + ---------- + *c_creator : zim.ZimCreator + a pointer to the C++ Creator object + _finalized : bool + flag if the creator was finalized + """ + + cdef clibzim.ZimCreatorWrapper *c_creator + cdef bool _finalized + + def __cinit__(self, str filename, str main_page = "", str index_language = "eng", min_chunk_size = 2048): + """Constructs a ZimCreator from parameters. + Parameters + ---------- + filename : str + Zim file path + main_page : str + Zim file main page + index_language : str + Zim file index language (default eng) + min_chunk_size : int + Minimum chunk size (default 2048) + """ + + self.c_creator = clibzim.ZimCreatorWrapper.create(filename.encode("UTF-8"), main_page.encode("UTF-8"), index_language.encode("UTF-8"), min_chunk_size) + self._finalized = False + + def __dealloc__(self): + del self.c_creator + + def add_article(self, article not None): + """Add a article to the Creator object. + + Parameters + ---------- + article : ZimArticle + The article to add to the file + Raises + ------ + RuntimeError + If the ZimCreator was already finalized + """ + if self._finalized: + raise RuntimeError("ZimCreator already finalized") + + # Make a shared pointer to ZimArticleWrapper from the ZimArticle object + cdef shared_ptr[clibzim.ZimArticleWrapper] art = shared_ptr[clibzim.ZimArticleWrapper]( + new clibzim.ZimArticleWrapper(article)); + with nogil: + self.c_creator.addArticle(art) + + def finalize(self): + """finalize and write added articles to the file. + + Raises + ------ + RuntimeError + If the ZimCreator was already finalized + """ + if self._finalized: + raise RuntimeError("ZimCreator already finalized") + with nogil: + self.c_creator.finalize() + self._finalized = True diff --git a/libzim/writer.py b/libzim/writer.py new file mode 100644 index 00000000..a365e9f1 --- /dev/null +++ b/libzim/writer.py @@ -0,0 +1,190 @@ +# This file is part of python-libzim +# (see https://github.com/libzim/python-libzim) +# +# Copyright (c) 2020 Juan Diego Caballero +# Copyright (c) 2020 Matthieu Gautier +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +import datetime +from collections import defaultdict + +import libzim_wrapper +from libzim_wrapper import Blob + +__all__ = ["Article", "Blob", "Creator"] + + +class Article: + def __init__(self): + self._blob = None + + def get_url(self): + raise NotImplementedError + + def get_title(self): + raise NotImplementedError + + def is_redirect(self): + raise NotImplementedError + + def get_mime_type(self): + raise NotImplementedError + + def get_filename(self): + raise NotImplementedError + + def should_compress(self): + raise NotImplementedError + + def should_index(self): + raise NotImplementedError + + def redirect_url(self): + raise NotImplementedError + + def _get_data(self): + if self._blob is None: + self._blob = self.get_data() + return self._blob + + def get_data(self): + raise NotImplementedError + + +class MetadataArticle(Article): + def __init__(self, url, metadata_content): + Article.__init__(self) + self.url = url + self.metadata_content = metadata_content + + def is_redirect(self): + return False + + def get_url(self): + return f"M/{self.url}" + + def get_title(self): + return "" + + def get_mime_type(self): + return "text/plain" + + def get_filename(self): + return "" + + def should_compress(self): + return True + + def should_index(self): + return False + + def get_data(self): + return Blob(self.metadata_content) + + +MANDATORY_METADATA_KEYS = [ + "Name", + "Title", + "Creator", + "Publisher", + "Date", + "Description", + "Language", +] + + +class Creator: + """ + A class to represent a Zim Creator. + + Attributes + ---------- + *c_creator : zim.Creator + a pointer to the C++ Creator object + _finalized : bool + flag if the creator was finalized + _filename : str + Zim file path + _main_page : str + Zim file main page + _index_language : str + Zim file Index language + _min_chunk_size : str + Zim file minimum chunk size + _article_counter + Zim file article counter + _metadata + Zim file metadata + """ + + def __init__(self, filename, main_page, index_language, min_chunk_size): + print(filename) + self._creatorWrapper = libzim_wrapper.Creator( + filename, main_page, index_language, min_chunk_size + ) + self.filename = filename + self.main_page = main_page + self.language = index_language + self._metadata = {} + self._article_counter = defaultdict(int) + self.update_metadata(date=datetime.date.today(), language=index_language) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + def add_article(self, article): + self._creatorWrapper.add_article(article) + if not article.is_redirect(): + self._update_article_counter(article) + + def _update_article_counter(self, article): + # default dict update + self._article_counter[article.get_mime_type().strip()] += 1 + + def mandatory_metadata_ok(self): + """Flag if mandatory metadata is complete and not empty""" + metadata_item_ok = [k in self._metadata for k in MANDATORY_METADATA_KEYS] + return all(metadata_item_ok) + + def update_metadata(self, **kwargs): + "Updates article metadata" "" + # Converts python case to pascal case. example: long_description-> LongDescription + pascalize = lambda keyword: "".join(keyword.title().split("_")) + new_metadata = {pascalize(k): v for k, v in kwargs.items()} + self._metadata.update(new_metadata) + + def write_metadata(self): + for key, value in self._metadata.items(): + if key == "date" and isinstance(value, datetime.date): + value = value.strftime("%Y-%m-%d") + article = MetadataArticle(key, value) + self._creatorWrapper.add_article(article) + + article = MetadataArticle("Counter", self._get_counter_string()) + self._creatorWrapper.add_article(article) + + def _get_counter_string(self): + return ";".join(["%s=%s" % (k, v) for (k, v) in self._article_counter.items()]) + + def close(self): + self.write_metadata() + self._creatorWrapper.finalize() + + def __repr__(self): + return f"Creator(filename={self.filename})" diff --git a/setup.py b/setup.py index eb4a7df5..8a05d11c 100755 --- a/setup.py +++ b/setup.py @@ -19,12 +19,12 @@ def read(fname): license = "GPLv3+", long_description=read('README.md'), ext_modules = cythonize([ - Extension("libzim", ["libzim/*.pyx","libzim/lib.cxx"], + Extension("libzim_wrapper", ["libzim/*.pyx", "libzim/lib.cxx"], include_dirs=["libzim"], libraries=["zim"], extra_compile_args=["-std=c++11"], language="c++"), - ], - compiler_directives={'language_level' : "3"} - ) + ], + compiler_directives={'language_level' : "3"} + ), ) diff --git a/tests/test_libzim.py b/tests/test_libzim.py index d2967dca..011d8f55 100644 --- a/tests/test_libzim.py +++ b/tests/test_libzim.py @@ -1,7 +1,7 @@ # This file is part of python-libzim # (see https://github.com/libzim/python-libzim) # -i# Copyright (c) 2020 Juan Diego Caballero +# Copyright (c) 2020 Juan Diego Caballero # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,50 +16,57 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import unittest -import os,sys,inspect +import pytest -# Import local libzim module from parent -current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) -parent_dir = os.path.dirname(current_dir) -sys.path.insert(0, parent_dir) - -from libzim import ZimArticle, ZimBlob, ZimCreator +from libzim.writer import Article, Blob, Creator # test files https://wiki.kiwix.org/wiki/Content_in_all_languages # https://wiki.openzim.org/wiki/Metadata - -TEST_METADATA = { - # Mandatory - "Name" : "wikipedia_fr_football", - "Title": "English Wikipedia", - "Creator": "English speaking Wikipedia contributors", - "Publisher": "Wikipedia user Foobar", - "Date": "2009-11-21", - "Description": "All articles (without images) from the english Wikipedia", - "Language": "eng", - # Optional - "Longdescription": "This ZIM file contains all articles (without images) from the english Wikipedia by 2009-11-10. The topics are ...", - "Licence": "CC-BY", - "Tags": "wikipedia;_category:wikipedia;_pictures:no;_videos:no;_details:yes;_ftindex:yes", - "Flavour": "nopic", - "Source": "https://en.wikipedia.org/", - "Counter": "image/jpeg=5;image/gif=3;image/png=2", - "Scraper": "sotoki 1.2.3" -} - -class ZimTestArticle(ZimArticle): - content = ''' - - - Monadical - -

ñññ Hello, it works ñññ

''' - - def __init__(self): - ZimArticle.__init__(self) +@pytest.fixture(scope="session") +def metadata(): + return { + # Mandatory + "Name": "wikipedia_fr_football", + "Title": "English Wikipedia", + "Creator": "English speaking Wikipedia contributors", + "Publisher": "Wikipedia user Foobar", + "Date": "2009-11-21", + "Description": "All articles (without images) from the english Wikipedia", + "Language": "eng", + # Optional + "Longdescription": "This ZIM file contains all articles (without images) from the english Wikipedia by 2009-11-10. The topics are ...", + "Licence": "CC-BY", + "Tags": "wikipedia;_category:wikipedia;_pictures:no;_videos:no;_details:yes;_ftindex:yes", + "Flavour": "nopic", + "Source": "https://en.wikipedia.org/", + "Counter": "image/jpeg=5;image/gif=3;image/png=2", + "Scraper": "sotoki 1.2.3", + } + + +@pytest.fixture(scope="session") +def article_content(): + content = """ + + + Monadical + +

ñññ Hello, it works ñññ

""" + url = "A/Monadical_SAS" + title = "Monadical SAS" + mime_type = "text/html" + return (content, url, title, mime_type) + + +class SimpleArticle(Article): + def __init__(self, content, url, title, mime_type): + Article.__init__(self) + self.content = content + self.url = url + self.title = title + self.mime_type = mime_type def is_redirect(self): return False @@ -69,17 +76,17 @@ def can_write(self): return True def get_url(self): - return "A/Monadical_SAS" + return self.url def get_title(self): - return "Monadical SAS" - + return self.title + def get_mime_type(self): - return "text/html" - + return self.mime_type + def get_filename(self): return "" - + def should_compress(self): return True @@ -87,50 +94,55 @@ def should_index(self): return True def get_data(self): - return ZimBlob(self.content.encode('UTF-8')) - - -class TestZimCreator(unittest.TestCase): - def setUp(self): - self.test_zim_file_path = "/opt/python-libzim/tests/kiwix-test" - - # Test article - self.test_article = ZimTestArticle() - - def tearDown(self): - pass - - def _assert_article_properties(self, written_article, article): - pass - - def _add_article_to_test_zim_file_read_it_back(self, article, delete_zim_file=True): - pass - - def test_write_article(self): - import uuid - rnd_str = str(uuid.uuid1()) - zim_creator = ZimCreator(self.test_zim_file_path + '-' + rnd_str + '.zim',main_page = "welcome",index_language= "eng", min_chunk_size= 2048) - zim_creator.add_article(self.test_article) - # Set mandatory metadata - zim_creator.update_metadata(creator='python-libzim',description='Created in python',name='Hola',publisher='Monadical',title='Test Zim') - zim_creator.finalize() - - def test_article_metadata(self): - import uuid - rnd_str = str(uuid.uuid1()) - zim_creator = ZimCreator(self.test_zim_file_path + '-' + rnd_str + '.zim',main_page = "welcome",index_language= "eng", min_chunk_size= 2048) - zim_creator.update_metadata(**TEST_METADATA) - self.assertEqual(zim_creator._get_metadata(), TEST_METADATA) - - def test_check_mandatory_metadata(self): - import uuid - rnd_str = str(uuid.uuid1()) - zim_creator = ZimCreator(self.test_zim_file_path + '-' + rnd_str + '.zim',main_page = "welcome",index_language= "eng", min_chunk_size= 2048) - self.assertFalse(zim_creator.mandatory_metadata_ok()) - zim_creator.update_metadata(creator='python-libzim',description='Created in python',name='Hola',publisher='Monadical',title='Test Zim') - self.assertTrue(zim_creator.mandatory_metadata_ok()) - - - -if __name__ == '__main__': - unittest.main() + return Blob(self.content.encode("UTF-8")) + + +@pytest.fixture(scope="session") +def article(article_content): + return SimpleArticle(*article_content) + + +def test_write_article(tmpdir, article): + with Creator( + str(tmpdir / "test.zim"), + main_page="welcome", + index_language="eng", + min_chunk_size=2048, + ) as zim_creator: + zim_creator.add_article(article) + zim_creator.update_metadata( + creator="python-libzim", + description="Created in python", + name="Hola", + publisher="Monadical", + title="Test Zim", + ) + + +def test_article_metadata(tmpdir, metadata): + with Creator( + str(tmpdir / "test.zim"), + main_page="welcome", + index_language="eng", + min_chunk_size=2048, + ) as zim_creator: + zim_creator.update_metadata(**metadata) + assert zim_creator._metadata == metadata + + +def test_check_mandatory_metadata(tmpdir): + with Creator( + str(tmpdir / "test.zim"), + main_page="welcome", + index_language="eng", + min_chunk_size=2048, + ) as zim_creator: + assert not zim_creator.mandatory_metadata_ok() + zim_creator.update_metadata( + creator="python-libzim", + description="Created in python", + name="Hola", + publisher="Monadical", + title="Test Zim", + ) + assert zim_creator.mandatory_metadata_ok()