From 87e05891b332ebd7fc2744f8351540507310b2bc Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 23 Apr 2020 10:42:40 +0200 Subject: [PATCH 1/2] Better error handling of c++ calling python. - Catch all python exception, not only the getattr. - Use a string instead of a int as error "marker" to provide more information to the user. - Pass the full traceback in the string. This way the user can have information of where the exception was raised. `get_article_method_from_object` is removed as it was mainly calling `getattr` and handle error. As we also need to handle error for the method call, the handle in the specific function is not needed. And we don't need a function that just call another one. --- libzim/lib.cxx | 27 ++++++++++--------- libzim/wrapper.pyx | 63 +++++++++++++++++++++++++------------------- libzim/writer.py | 18 ++++++------- tests/test_libzim.py | 41 +++++++++++++++++++++++++++- 4 files changed, 99 insertions(+), 50 deletions(-) diff --git a/libzim/lib.cxx b/libzim/lib.cxx index 58af0a11..9f83a778 100644 --- a/libzim/lib.cxx +++ b/libzim/lib.cxx @@ -25,6 +25,7 @@ #include "wrapper_api.h" +#include #include #include #include @@ -62,11 +63,11 @@ std::string ZimArticleWrapper::callCythonReturnString(std::string methodName) co if (!this->m_obj) throw std::runtime_error("Python object not set"); - int error; + std::string error; std::string ret_val = string_cy_call_fct(this->m_obj, methodName, &error); - if (error) - throw std::runtime_error("The pure virtual function " + methodName + " must be override"); + if (!error.empty()) + throw std::runtime_error(error); return ret_val; } @@ -76,11 +77,11 @@ zim::Blob ZimArticleWrapper::callCythonReturnBlob(std::string methodName) const if (!this->m_obj) throw std::runtime_error("Python object not set"); - int error; + std::string error; zim::Blob ret_val = blob_cy_call_fct(this->m_obj, methodName, &error); - if (error) - throw std::runtime_error("The pure virtual function " + methodName + " must be override"); + if (!error.empty()) + throw std::runtime_error(error); return ret_val; } @@ -90,11 +91,11 @@ bool ZimArticleWrapper::callCythonReturnBool(std::string methodName) const if (!this->m_obj) throw std::runtime_error("Python object not set"); - int error; + std::string error; bool ret_val = bool_cy_call_fct(this->m_obj, methodName, &error); - if (error) - throw std::runtime_error("The pure virtual function " + methodName + " must be override"); + if (!error.empty()) + throw std::runtime_error(error); return ret_val; } @@ -104,11 +105,11 @@ uint64_t ZimArticleWrapper::callCythonReturnInt(std::string methodName) const if (!this->m_obj) throw std::runtime_error("Python object not set"); - int error; + std::string error; - int ret_val = int_cy_call_fct(this->m_obj, methodName, &error); - if (error) - throw std::runtime_error("The pure virtual function " + methodName + " must be override"); + int64_t ret_val = int_cy_call_fct(this->m_obj, methodName, &error); + if (!error.empty()) + throw std::runtime_error(error); return ret_val; } diff --git a/libzim/wrapper.pyx b/libzim/wrapper.pyx index 76431467..06cf0997 100644 --- a/libzim/wrapper.pyx +++ b/libzim/wrapper.pyx @@ -32,6 +32,7 @@ from libcpp.memory cimport shared_ptr, make_shared, unique_ptr import pathlib import datetime +import traceback ######################### @@ -98,44 +99,52 @@ cdef class ReadingBlob: self.view_count -= 1 -#------ Helper for pure virtual methods -------- - -cdef get_article_method_from_object(object obj, string method, int *error) with gil: - try: - func = getattr(obj, method.decode('UTF-8')) - except AttributeError: - error[0] = 1 - raise - else: - error[0] = 0 - return func - #------- ZimArticle pure virtual methods -------- cdef public api: - string string_cy_call_fct(object obj, string method, int *error) with gil: + string string_cy_call_fct(object obj, string method, string *error) with gil: """Lookup and execute a pure virtual method on ZimArticle returning a string""" - func = get_article_method_from_object(obj, method, error) - ret_str = func() - return ret_str.encode('UTF-8') - - wrapper.Blob blob_cy_call_fct(object obj, string method, int *error) with gil: + try: + func = getattr(obj, method.decode('UTF-8')) + ret_str = func() + return ret_str.encode('UTF-8') + except Exception as e: + error[0] = traceback.format_exc().encode('UTF-8') + return b"" + + wrapper.Blob blob_cy_call_fct(object obj, string method, string *error) with gil: """Lookup and execute a pure virtual method on ZimArticle returning a Blob""" cdef WritingBlob blob - func = get_article_method_from_object(obj, method, error) - blob = func() - return dereference(blob.c_blob) + try: + func = getattr(obj, method.decode('UTF-8')) + blob = func() + if blob is None: + raise RuntimeError("Blob is none") + return dereference(blob.c_blob) + except Exception as e: + error[0] = traceback.format_exc().encode('UTF-8') - bool bool_cy_call_fct(object obj, string method, int *error) with gil: + return Blob() + + bool bool_cy_call_fct(object obj, string method, string *error) with gil: """Lookup and execute a pure virtual method on ZimArticle returning a bool""" - func = get_article_method_from_object(obj, method, error) - return func() + try: + func = getattr(obj, method.decode('UTF-8')) + return func() + except Exception as e: + error[0] = traceback.format_exc().encode('UTF-8') + return False - uint64_t int_cy_call_fct(object obj, string method, int *error) with gil: + uint64_t int_cy_call_fct(object obj, string method, string *error) with gil: """Lookup and execute a pure virtual method on ZimArticle returning an int""" - func = get_article_method_from_object(obj, method, error) - return func() + try: + func = getattr(obj, method.decode('UTF-8')) + return func() + except Exception as e: + error[0] = traceback.format_exc().encode('UTF-8') + + return 0 cdef class Creator: """ Zim Creator diff --git a/libzim/writer.py b/libzim/writer.py index 8c363423..7a7d1ce7 100644 --- a/libzim/writer.py +++ b/libzim/writer.py @@ -50,35 +50,35 @@ def __init__(self): def get_url(self) -> str: """ Full URL of article including namespace """ - raise NotImplementedError + raise NotImplementedError("get_url must be implemented.") def get_title(self) -> str: """ Article title. Might be indexed and used in suggestions """ - raise NotImplementedError + raise NotImplementedError("get_title must be implemented.") def is_redirect(self) -> bool: """ Whether this redirects to another article (cf. redirec_url) """ - raise NotImplementedError + raise NotImplementedError("get_redirect must be implemented.") def get_mime_type(self) -> str: """ MIME-type of the article's content. A/ namespace reserved to text/html """ - raise NotImplementedError + raise NotImplementedError("get_mime_type must be implemented.") def get_filename(self) -> str: """ Filename to get content from. Blank string "" if not used """ - raise NotImplementedError + raise NotImplementedError("get_filename must be implemented.") def should_compress(self) -> bool: """ Whether the article's content should be compressed or not """ - raise NotImplementedError + raise NotImplementedError("should_compress must be implemented.") def should_index(self) -> bool: """ Whether the article's content should be indexed or not """ - raise NotImplementedError + raise NotImplementedError("should_index must be implemented.") def redirect_url(self) -> str: """ Full URL including namespace of another article """ - raise NotImplementedError + raise NotImplementedError("redirect_url must be implemented.") def _get_data(self) -> Blob: """ Internal data-retrieval with a cache to the content's pointer @@ -90,7 +90,7 @@ def _get_data(self) -> Blob: def get_data(self) -> Blob: """ Blob containing the complete content of the article """ - raise NotImplementedError + raise NotImplementedError("get_data must be implemented.") def __repr__(self) -> str: return f"{self.__class__.__name__}(url={self.get_url()}, title={self.get_title()})" diff --git a/tests/test_libzim.py b/tests/test_libzim.py index 9552baa8..b4172950 100644 --- a/tests/test_libzim.py +++ b/tests/test_libzim.py @@ -16,9 +16,12 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import pytest +import sys +import subprocess import pathlib +import pytest + from libzim.writer import Article, Blob, Creator from libzim.reader import File @@ -188,3 +191,39 @@ def test_filename_param_types(tmpdir): with Creator(str(path), "welcome") as creator: assert creator.filename == path assert isinstance(creator.filename, pathlib.Path) + + +def test_in_article_exceptions(tmpdir): + """ make sure we raise RuntimeError from article's virtual methods """ + + class BoolErrorArticle(SimpleArticle): + def is_redirect(self): + raise RuntimeError("OUPS Redirect") + + class StringErrorArticle(SimpleArticle): + def get_url(self): + raise IOError + + class BlobErrorArticle(SimpleArticle): + def get_data(self): + raise IOError + + path, main_page = tmpdir / "test.zim", "welcome" + args = {"title": "Hello", "mime_type": "text/html", "content": "", "url": "welcome"} + + with Creator(path, main_page) as zim_creator: + # make sure we can can exception of all types (except int, not used) + with pytest.raises(RuntimeError, match="OUPS Redirect"): + zim_creator.add_article(BoolErrorArticle(**args)) + with pytest.raises(RuntimeError, match="in get_url"): + zim_creator.add_article(StringErrorArticle(**args)) + with pytest.raises(RuntimeError, match="IOError"): + zim_creator.add_article(BlobErrorArticle(**args)) + with pytest.raises(RuntimeError, match="NotImplementedError"): + zim_creator.add_article(Article()) + + # make sure we can catch it from outside creator + with pytest.raises(RuntimeError): + with Creator(path, main_page) as zim_creator: + zim_creator.add_article(BlobErrorArticle(**args)) + From ea307f5b911cf4f38e334e1a862e0010adf7766a Mon Sep 17 00:00:00 2001 From: renaud gaudin Date: Fri, 26 Jun 2020 17:47:30 +0000 Subject: [PATCH 2/2] Test to ensure we can avoid ZIM file creation As the libzim lacks a `cancel()` method to properly stop a ZIM file creation process without calling `finalize()` (which always creates the ZIM file), users need to cheat the creator to not call finalize(), even upon exit (as `Creator`'s destructor do call `finalize()`) Test instanciates a different python VM as the libzim_reader tests (manual garbage collection) leads to a segfault. --- tests/test_libzim.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_libzim.py b/tests/test_libzim.py index b4172950..ab2d1d50 100644 --- a/tests/test_libzim.py +++ b/tests/test_libzim.py @@ -227,3 +227,29 @@ def get_data(self): with Creator(path, main_page) as zim_creator: zim_creator.add_article(BlobErrorArticle(**args)) + +def test_dontcreatezim_onexception(tmpdir): + """ make sure we can prevent ZIM file creation (workaround missing cancel()) + + A new interpreter is instanciated to get a different memory space. + This workaround is not safe and may segfault at GC under some circumstances + + Unless we get a proper cancel() on libzim, that's the only way to not create + a ZIM file on error """ + path, main_page = tmpdir / "test.zim", "welcome" + pycode = f""" +from libzim.writer import Creator +from libzim.writer import Article +class BlobErrorArticle(Article): + def get_data(self): + raise ValueError +zim_creator = Creator("{path}", "{main_page}") +try: + zim_creator.add_article(BlobErrorArticle(**args)) +except Exception: + zim_creator._closed = True +""" + + py = subprocess.run([sys.executable, "-c", pycode]) + assert py.returncode == 0 + assert not path.exists()