diff --git a/libzim/reader.py b/libzim/reader.py index bef2f925..2a1f4d03 100644 --- a/libzim/reader.py +++ b/libzim/reader.py @@ -1,4 +1,16 @@ -# flake8: noqa +""" libzim reader module + + - File to open and read ZIM files + - Article are returned by File on get_article() and get_article_by_id() + - NotFound is raised on incorrect article URL query + + Usage: -from .wrapper import FilePy as File + with File(pathlib.Path("myfile.zim")) as zf: + article = zf.get_article(zf.main_page_url) + print(f"Article {article.title} at {article.url} is {article.content.nbytes}b") + """ + +# flake8: noqa +from .wrapper import FilePy as File, NotFound from .wrapper import ReadArticle as Article diff --git a/libzim/wrapper.pyx b/libzim/wrapper.pyx index ab8e5e0e..5f3a9b16 100644 --- a/libzim/wrapper.pyx +++ b/libzim/wrapper.pyx @@ -20,6 +20,7 @@ cimport libzim.wrapper as wrapper +from typing import Generator from cython.operator import dereference, preincrement from cpython.ref cimport PyObject from cpython.buffer cimport PyBUF_WRITABLE @@ -29,8 +30,13 @@ from libcpp.string cimport string from libcpp cimport bool from libcpp.memory cimport shared_ptr, make_shared, unique_ptr +import pathlib import datetime + +class NotFound(RuntimeError): + pass + ######################### # Blob # ######################### @@ -135,33 +141,31 @@ cdef public api: return func() cdef class Creator: - """ - A class to represent a Zim Creator. + """ Zim Creator - Attributes - ---------- - *c_creator : zim.ZimCreator - a pointer to the C++ Creator object - _finalized : bool - flag if the creator was finalized - """ + Attributes + ---------- + *c_creator : zim.ZimCreatorWrapper + a pointer to the C++ Creator object + _finalized : bool + flag if the creator was finalized """ cdef wrapper.ZimCreatorWrapper *c_creator cdef bool _finalized - def __cinit__(self, str filename, str main_page = "", str index_language = "eng", min_chunk_size = 2048): - """Constructs a ZimCreator from parameters. - Parameters - ---------- - filename : str - Zim file path - main_page : str - Zim file main page - index_language : str - Zim file index language (default eng) - min_chunk_size : int - Minimum chunk size (default 2048) - """ + def __cinit__(self, str filename: str, str main_page: str = "", str index_language: str = "eng", min_chunk_size: int = 2048): + """ Constructs a Zim Creator from parameters. + + Parameters + ---------- + filename : str + Zim file path + main_page : str + Zim file main page (without namespace, must be in A/) + index_language : str + Zim file index language (default eng) + min_chunk_size : int + Minimum chunk size (default 2048) """ self.c_creator = wrapper.ZimCreatorWrapper.create(filename.encode("UTF-8"), main_page.encode("UTF-8"), index_language.encode("UTF-8"), min_chunk_size) self._finalized = False @@ -170,17 +174,16 @@ cdef class Creator: del self.c_creator def add_article(self, article not None): - """Add a article to the Creator object. - - Parameters - ---------- - article : ZimArticle - The article to add to the file - Raises - ------ - RuntimeError - If the ZimCreator was already finalized - """ + """ Add an article to the Creator object. + + Parameters + ---------- + article : ZimArticle + The article to add to the file + Raises + ------ + RuntimeError + If the ZimCreator was already finalized """ if self._finalized: raise RuntimeError("ZimCreator already finalized") @@ -191,13 +194,12 @@ cdef class Creator: self.c_creator.addArticle(art) def finalize(self): - """finalize and write added articles to the file. + """ Finalize creation and write added articles to the file. - Raises - ------ - RuntimeError - If the ZimCreator was already finalized - """ + Raises + ------ + RuntimeError + If the ZimCreator was already finalized """ if self._finalized: raise RuntimeError("ZimCreator already finalized") with nogil: @@ -209,36 +211,12 @@ cdef class Creator: ######################## cdef class ReadArticle: - """ - A class to represent a Zim File Article. - - Attributes - ---------- - *c_article : Article (zim::) - a pointer to the C++ article object - - Properties - ----------- - namespace : str - the article namespace - title : str - the article title - content : str - the article content - longurl : str - the article long url i.e {NAMESPACE}/{redirect_url} - url : str - the article url - mimetype : str - the article mimetype - is_redirect : bool - flag if the article is a redirect - - Methods - ------- - from_read_article(zim.Article art) - Creates a python ZimArticle from a C++ zim.Article article. - """ + """ Article in a Zim file + + Attributes + ---------- + *c_article : Article (zim::) + a pointer to the C++ article object """ cdef wrapper.Article c_article cdef ReadingBlob _blob cdef bool _haveBlob @@ -252,50 +230,47 @@ cdef class ReadArticle: self._haveBlob = False cdef __setup(self, wrapper.Article art): - """Assigns an internal pointer to the wrapped C++ article object. + """ Assigns an internal pointer to the wrapped C++ article object. - Parameters - ---------- - *art : Article - Pointer to a C++ (zim::) article object - """ + Parameters + ---------- + *art : Article + Pointer to a C++ (zim::) article object """ # Set new internal C zim.ZimArticle article self.c_article = art self._blob = None - - # Factory functions - Currently Cython can't use classmethods @staticmethod cdef from_read_article(wrapper.Article art): - """Creates a python ZimFileArticle from a C++ Article (zim::). - - Parameters - ---------- - art : Article - A C++ Article read with File - Return - ------ - - """ + """ Creates a python ReadArticle from a C++ Article (zim::) -> ReadArticle + + Parameters + ---------- + art : Article + A C++ Article read with File + Returns + ------ + ReadArticle + Casted article """ cdef ReadArticle article = ReadArticle() article.__setup(art) return article @property - def namespace(self): - """Get the article's namespace""" + def namespace(self) -> str: + """ Article's namespace -> str """ ns = self.c_article.getNamespace() return chr(ns) @property - def title(self): - """Get the article's title""" + def title(self) -> str: + """ Article's title -> str """ return self.c_article.getTitle().decode('UTF-8') @property - def content(self): - """Get the article's content""" + def content(self) -> memoryview: + """ Article's content -> memoryview """ if not self._haveBlob: self._blob = ReadingBlob() self._blob.__setup(self.c_article.getData( 0)) @@ -303,27 +278,27 @@ cdef class ReadArticle: return memoryview(self._blob) @property - def longurl(self): - """Get the article's long url i.e {NAMESPACE}/{url}""" + def longurl(self) -> str: + """ Article's long url, including namespace -> str""" return self.c_article.getLongUrl().decode("UTF-8", "strict") @property - def url(self): - """Get the article's url""" + def url(self) -> str: + """ Article's url without namespace -> str """ return self.c_article.getUrl().decode("UTF-8", "strict") @property - def mimetype(self): - """Get the article's mimetype""" + def mimetype(self) -> str: + """ Article's mimetype -> str """ return self.c_article.getMimeType().decode('UTF-8') @property - def is_redirect(self): - """Get if the article is a redirect""" + def is_redirect(self) -> bool: + """ Whether article is a redirect -> bool """ return self.c_article.isRedirect() def get_redirect_article(self) -> ReadArticle: - """ Target ReadArticle of this one """ + """ Target ReadArticle of this one -> ReadArticle """ if not self.is_redirect: raise RuntimeError("Article is not a redirect") @@ -335,7 +310,7 @@ cdef class ReadArticle: return article def __repr__(self): - return f"{self.__class__.__name__}(url={self.longurl}, title=)" + return f"{self.__class__.__name__}(url={self.longurl}, title={self.title})" @@ -345,108 +320,114 @@ cdef class ReadArticle: ######################### cdef class FilePy: - """ - A class to represent a Zim File Reader. + """ Zim File Reader - Attributes - ---------- - *c_file : File - a pointer to a C++ File object - _filename : str - the file name of the File Reader object - """ + Attributes + ---------- + *c_file : File + a pointer to a C++ File object + _filename : pathlib.Path + the file name of the File Reader object """ cdef wrapper.File *c_file cdef object _filename - def __cinit__(self, str filename): - """Constructs a File from full zim file path. - Parameters - ---------- - filename : str - Full path to a zim file - """ + def __cinit__(self, object filename: pathlib.Path): + """ Constructs a File from full zim file path + + Parameters + ---------- + filename : pathlib.Path + Full path to a zim file """ - self.c_file = new wrapper.File(filename.encode('UTF-8')) - self._filename = self.c_file.getFilename().decode("UTF-8", "strict") + self.c_file = new wrapper.File(str(filename).encode('UTF-8')) + self._filename = pathlib.Path(self.c_file.getFilename().decode("UTF-8", "strict")) def __dealloc__(self): if self.c_file != NULL: del self.c_file + def __enter__(self): + return self + + def __exit__(self, *args): + pass + @property - def filename(self): - """Get the filename of the File object""" + def filename(self) -> pathlib.Path: + """ Filename of the File object -> pathlib.Path """ return self._filename - def get_article(self, url): - """Get a Article with a copy of the file article by full url i.e including namespace - - Parameters - ---------- - url : str - The full url, including namespace, of the article - Returns - ------- - Article - The Article object - Raises - ------ - RuntimeError - If an article with the provided long url is not found in the file - """ + def get_article(self, url: str) -> ReadArticle: + """ ReadArticle with a copy of the file article by full url (including namespace) -> ReadArticle + + Parameters + ---------- + url : str + The full url, including namespace, of the article + Returns + ------- + ReadArticle + The ReadArticle object + Raises + ------ + NotFound + If an article with the provided long url is not found in the file """ # Read to a zim::Article cdef wrapper.Article art = self.c_file.getArticleByUrl(url.encode('UTF-8')) if not art.good(): - raise RuntimeError("Article not found for url") + raise NotFound("Article not found for url") article = ReadArticle.from_read_article(art) return article - def get_metadata(self, name): - """Get the file metadata. - Returns - ------- - dict - A dictionary with the file metadata - """ - article = self.get_article(f"M/{name}") - return article.content + def get_metadata(self, name: str) -> bytes: + """ A Metadata's content -> bytes + + Parameters + ---------- + name: str + url of the metadata article (without namespace) + Returns + ------- + bytes + Metadata article's content. Can be of any type. """ + return bytes(self.get_article(f"M/{name}").content) - def get_article_by_id(self, id): - """Get a ZimFileArticle with a copy of the file article by article id. + def get_article_by_id(self, article_id: int) -> ReadArticle: + """ ReadArticle with a copy of the file article by article id -> ReadArticle Parameters ---------- - id : int + article_id : int The id of the article Returns ------- - ZimFileArticle - The ZimFileArticle object + ReadArticle + The ReadArticle object Raises ------ RuntimeError - If an article with the provided id is not found in the file - """ + If there is a problem in retrieving article (id out of bound) + NotFound + If an article with the provided id is not found in the file """ # Read to a zim::Article - cdef wrapper.Article art = self.c_file.getArticle( id) + cdef wrapper.Article art = self.c_file.getArticle( article_id) if not art.good(): - raise RuntimeError("Article not found for id") + raise NotFound("Article not found for id") article = ReadArticle.from_read_article(art) return article @property - def main_page_url(self): - """Get the file main page url. - Returns - ------- - str - The url of the main page - TODO Check old formats - """ + def main_page_url(self) -> str: + """ File's main page full url -> str + + Returns + ------- + str + The url of the main page, including namespace """ cdef wrapper.Fileheader header = self.c_file.getFileheader() cdef wrapper.Article article if header.hasMainPage(): @@ -461,61 +442,59 @@ cdef class FilePy: return article.getLongUrl().decode("UTF-8", "strict") @property - def checksum(self): - """Get the file checksum. - Returns - ------- - str - The file checksum - """ + def checksum(self) -> str: + """ File's checksum -> str + + Returns + ------- + str + The file's checksum """ return self.c_file.getChecksum().decode("UTF-8", "strict") @property - def article_count(self): - """Get the file article count. - Returns - ------- - int - The total number of articles from the file - """ + def article_count(self) -> int: + """ File's articles count -> int + + Returns + ------- + int + The total number of articles in the file """ return self.c_file.getCountArticles() @property def namespaces(self) -> str: - """Get the namespaces. + """ Namespaces present in the file -> str Returns ------- str - A string containing all namespaces in the file - - """ + A string containing initials of all namespaces in the file (ex: "-AIMX") """ return self.c_file.getNamespaces().decode("UTF-8", "strict") - def get_namespaces_count(self, str ns): - """Get article count from a namespaces. - Returns - ------- - int - The total number of articles from the namespace - """ + def get_namespaces_count(self, str ns) -> int: + """ Articles count within a namespace -> int + + Returns + ------- + int + The total number of articles in the namespace """ return self.c_file.getNamespaceCount(ord(ns[0])) - def suggest(self, query, start=0, end=10): - """Get an iterator of the full urls of suggested articles in the file from a title query. - Parameters - ---------- - query : str - Title query string - start : int - Iterator start (default 0) - end : end - Iterator end (default 10) - Returns - ------- - iterator - An interator with the urls of suggested articles starting from start position - """ + def suggest(self, query: str, start: int = 0, end: int = 10) -> Generator[str, None, None]: + """ Full urls of suggested articles in the file from a title query -> Generator[str, None, None] + + Parameters + ---------- + query : str + Title query string + start : int + Iterator start (default 0) + end : end + Iterator end (default 10) + Returns + ------- + Generator + Url of suggested article """ cdef unique_ptr[wrapper.Search] search = self.c_file.suggestions(query.encode('UTF-8'),start, end) cdef wrapper.search_iterator it = dereference(search).begin() @@ -523,21 +502,21 @@ cdef class FilePy: yield it.get_url().decode('UTF-8') preincrement(it) - def search(self, query, start=0, end=10): - """Get an iterator of the full urls of articles in the file from a search query. - Parameters - ---------- - query : str - Query string - start : int - Iterator start (default 0) - end : end - Iterator end (default 10) - Returns - ------- - iterator - An iterator with the urls of articles matching the search query starting from start position - """ + def search(self, query: str, start: int = 0, end: int = 10) -> Generator[str, None, None]: + """ Full urls of articles in the file from a search query -> Generator[str, None, None] + + Parameters + ---------- + query : str + Query string + start : int + Iterator start (default 0) + end : end + Iterator end (default 10) + Returns + ------- + Generator + Url of article matching the search query """ cdef unique_ptr[wrapper.Search] search = self.c_file.search(query.encode('UTF-8'),start, end) cdef wrapper.search_iterator it = dereference(search).begin() @@ -546,33 +525,33 @@ cdef class FilePy: yield it.get_url().decode('UTF-8') preincrement(it) - def get_search_results_count(self, query): - """Get search results counts for a query. - Parameters - ---------- - query : str - Query string - Returns - ------- - int - Number of search results - """ + def get_search_results_count(self, query: str) -> int: + """ Number of search results for a query -> int + + Parameters + ---------- + query : str + Query string + Returns + ------- + int + Number of search results """ cdef unique_ptr[wrapper.Search] search = self.c_file.search(query.encode('UTF-8'),0, 1) return dereference(search).get_matches_estimated() - def get_suggestions_results_count(self, query): - """Get suggestions results counts for a query. - Parameters - ---------- - query : str - Query string - Returns - ------- - int - Number of article suggestions - """ + def get_suggestions_results_count(self, query: str) -> int: + """ Number of suggestions for a query -> int + + Parameters + ---------- + query : str + Query string + Returns + ------- + int + Number of article suggestions """ cdef unique_ptr[wrapper.Search] search = self.c_file.suggestions(query.encode('UTF-8'),0 , 1) return dereference(search).get_matches_estimated() - def __repr__(self): - return f"{self.__class__.__name__}(filename={self.filename}" + def __repr__(self) -> str: + return f"{self.__class__.__name__}(filename={self.filename})" diff --git a/libzim/writer.py b/libzim/writer.py index 2a3893dc..8c363423 100644 --- a/libzim/writer.py +++ b/libzim/writer.py @@ -1,3 +1,16 @@ +""" libzim writer module + - Creator to create ZIM files + - Article to store ZIM articles metadata + - Blob to store ZIM article content + Usage: + with Creator(pathlib.Path("myfile.zim"), main_page="welcome.html") as xf: + article = MyArticleSubclass( + url="A/welcome.html", + title="My Title", + content=Blob("My content")) + zf.add_article(article) + zf.update_metadata(tags="new;demo") """ + # This file is part of python-libzim # (see https://github.com/libzim/python-libzim) # @@ -17,9 +30,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . - +import pathlib import datetime -from collections import defaultdict +import collections from .wrapper import Creator as _Creator from .wrapper import WritingBlob as Blob @@ -28,121 +41,132 @@ class Article: + """ Article stub to override + + Pass a subclass of it to Creator.add_article() """ + def __init__(self): self._blob = None - def get_url(self): + def get_url(self) -> str: + """ Full URL of article including namespace """ raise NotImplementedError - def get_title(self): + def get_title(self) -> str: + """ Article title. Might be indexed and used in suggestions """ raise NotImplementedError - def is_redirect(self): + def is_redirect(self) -> bool: + """ Whether this redirects to another article (cf. redirec_url) """ raise NotImplementedError - def get_mime_type(self): + def get_mime_type(self) -> str: + """ MIME-type of the article's content. A/ namespace reserved to text/html """ raise NotImplementedError - def get_filename(self): + def get_filename(self) -> str: + """ Filename to get content from. Blank string "" if not used """ raise NotImplementedError - def should_compress(self): + def should_compress(self) -> bool: + """ Whether the article's content should be compressed or not """ raise NotImplementedError - def should_index(self): + def should_index(self) -> bool: + """ Whether the article's content should be indexed or not """ raise NotImplementedError - def redirect_url(self): + def redirect_url(self) -> str: + """ Full URL including namespace of another article """ raise NotImplementedError - def _get_data(self): + def _get_data(self) -> Blob: + """ Internal data-retrieval with a cache to the content's pointer + + You don't need to override this """ if self._blob is None: self._blob = self.get_data() return self._blob - def get_data(self): + def get_data(self) -> Blob: + """ Blob containing the complete content of the article """ raise NotImplementedError + def __repr__(self) -> str: + return f"{self.__class__.__name__}(url={self.get_url()}, title={self.get_title()})" + class MetadataArticle(Article): - def __init__(self, url, metadata_content): + """ Simple Article sub-class for key-value articles on M/ metadata namespace """ + + def __init__(self, url: str, metadata_content: str): Article.__init__(self) self.url = url self.metadata_content = metadata_content - def is_redirect(self): + def is_redirect(self) -> bool: return False - def get_url(self): + def get_url(self) -> str: return f"M/{self.url}" - def get_title(self): + def get_title(self) -> str: return "" - def get_mime_type(self): + def get_mime_type(self) -> str: return "text/plain" - def get_filename(self): + def get_filename(self) -> str: return "" - def should_compress(self): + def should_compress(self) -> bool: return True - def should_index(self): + def should_index(self) -> bool: return False - def get_data(self): + def get_data(self) -> Blob: return Blob(self.metadata_content) -MANDATORY_METADATA_KEYS = [ - "Name", - "Title", - "Creator", - "Publisher", - "Date", - "Description", - "Language", -] - - -def pascalize(keyword): - """ Converts python case to pascal case. example: long_description-> LongDescription """ - return "".join(keyword.title().split("_")) - - class Creator: - """ - A class to represent a Zim Creator. - - Attributes - ---------- - *c_creator : zim.Creator - a pointer to the C++ Creator object - _finalized : bool - flag if the creator was finalized - _filename : str - Zim file path - _main_page : str - Zim file main page - _index_language : str - Zim file Index language - _min_chunk_size : str - Zim file minimum chunk size - _article_counter - Zim file article counter - _metadata - Zim file metadata - """ - - def __init__(self, filename, main_page, index_language, min_chunk_size): - self._creatorWrapper = _Creator(filename, main_page, index_language, min_chunk_size) - self.filename = filename + """ Zim Creator. + + Attributes + ---------- + *_creatorWrapper : wrapper.ZimCreatorWrapper + a pointer to the C++ Creator object wrapper + filename : pathlib.Path + Zim file path + main_page : str + Zim file main page (without namespace) + language : str + Zim file Index language + _article_counter + Zim file article counter + _metadata + Zim file metadata """ + + def __init__( + self, filename: pathlib.Path, main_page: str, index_language: str = "eng", min_chunk_size: int = 2048, + ): + """ Creates a ZIM Creator + + Parameters + ---------- + filename : Path to create the ZIM file at + main_page: ZIM file main article URL (without namespace, must be in A/) + index_language: content language to inform indexer with (ISO-639-3) + min_chunk_size: minimum size of chunks for compression """ + + self._creatorWrapper = _Creator(str(filename), main_page, index_language, min_chunk_size) + self.filename = pathlib.Path(filename) self.main_page = main_page self.language = index_language self._metadata = {} - self._article_counter = defaultdict(int) + self._article_counter = collections.defaultdict(int) self.update_metadata(date=datetime.date.today(), language=index_language) + self._closed = False def __enter__(self): return self @@ -151,43 +175,59 @@ def __exit__(self, *args): self.close() def __del__(self): - self.close() - - def add_article(self, article): + if not self._closed: + self.close() + + def add_article(self, article: Article): + """ Adds an article to the Creator. + + Parameters + ---------- + article : Zim writer Article + The article to add to the file + Raises + ------ + RuntimeError + If the ZimCreator was already finalized """ self._creatorWrapper.add_article(article) if not article.is_redirect(): - self._update_article_counter(article) + # update article counter + self._article_counter[article.get_mime_type().strip()] += 1 - def _update_article_counter(self, article): - # default dict update - self._article_counter[article.get_mime_type().strip()] += 1 + def update_metadata(self, **kwargs: str): + """ Updates Creator metadata for ZIM, supplied as keyword arguments """ - def mandatory_metadata_ok(self): - """Flag if mandatory metadata is complete and not empty""" - metadata_item_ok = [k in self._metadata for k in MANDATORY_METADATA_KEYS] - return all(metadata_item_ok) + def pascalize(keyword: str): + """ Converts python case to pascal case. - def update_metadata(self, **kwargs): - "Updates article metadata" "" - new_metadata = {pascalize(k): v for k, v in kwargs.items()} - self._metadata.update(new_metadata) + example: long_description -> LongDescription """ + return "".join(keyword.title().split("_")) - def write_metadata(self): + self._metadata.update({pascalize(k): v for k, v in kwargs.items()}) + + def close(self): + """ Finalizes and writes added articles to the file + + Raises + ------ + RuntimeError + If the ZimCreator was already finalized """ + if self._closed: + raise RuntimeError("Creator already closed") + + # Store _medtadata dict as MetadataArticle for key, value in self._metadata.items(): - if key == "Date" and isinstance(value, datetime.date): + if key == "Date" and isinstance(value, (datetime.date, datetime.datetime)): value = value.strftime("%Y-%m-%d") article = MetadataArticle(key, value) self._creatorWrapper.add_article(article) - article = MetadataArticle("Counter", self._get_counter_string()) + counter_str = ";".join([f"{k}={v}" for (k, v) in self._article_counter.items()]) + article = MetadataArticle("Counter", counter_str) self._creatorWrapper.add_article(article) - def _get_counter_string(self): - return ";".join(["%s=%s" % (k, v) for (k, v) in self._article_counter.items()]) - - def close(self): - self.write_metadata() self._creatorWrapper.finalize() + self._closed = True - def __repr__(self): + def __repr__(self) -> str: return f"Creator(filename={self.filename})" diff --git a/tests/test_libzim.py b/tests/test_libzim.py index 37979166..9552baa8 100644 --- a/tests/test_libzim.py +++ b/tests/test_libzim.py @@ -17,6 +17,7 @@ # along with this program. If not, see . import pytest +import pathlib from libzim.writer import Article, Blob, Creator from libzim.reader import File @@ -108,7 +109,7 @@ def article(article_content): def test_write_article(tmpdir, article): with Creator( - str(tmpdir / "test.zim"), main_page="welcome", index_language="eng", min_chunk_size=2048, + tmpdir / "test.zim", main_page="welcome", index_language="eng", min_chunk_size=2048, ) as zim_creator: zim_creator.add_article(article) zim_creator.update_metadata( @@ -122,29 +123,14 @@ def test_write_article(tmpdir, article): def test_article_metadata(tmpdir, metadata): with Creator( - str(tmpdir / "test.zim"), main_page="welcome", index_language="eng", min_chunk_size=2048, + tmpdir / "test.zim", main_page="welcome", index_language="eng", min_chunk_size=2048, ) as zim_creator: zim_creator.update_metadata(**metadata) assert zim_creator._metadata == metadata -def test_check_mandatory_metadata(tmpdir): - with Creator( - str(tmpdir / "test.zim"), main_page="welcome", index_language="eng", min_chunk_size=2048, - ) as zim_creator: - assert not zim_creator.mandatory_metadata_ok() - zim_creator.update_metadata( - creator="python-libzim", - description="Created in python", - name="Hola", - publisher="Monadical", - title="Test Zim", - ) - assert zim_creator.mandatory_metadata_ok() - - def test_creator_params(tmpdir): - path = str(tmpdir / "test.zim") + path = tmpdir / "test.zim" main_page = "welcome" main_page_url = f"A/{main_page}" index_language = "eng" @@ -163,7 +149,7 @@ def test_creator_params(tmpdir): def test_segfault_on_realloc(tmpdir): """ assert that we are able to delete an unclosed Creator #31 """ - creator = Creator(str(tmpdir / "test.zim"), "welcome", "eng", 2048) + creator = Creator(tmpdir / "test.zim", "welcome", "eng", 2048) del creator # used to segfault assert True @@ -172,7 +158,7 @@ def test_noleftbehind_empty(tmpdir): """ assert that ZIM with no articles don't leave files behind #41 """ fname = "test_empty.zim" with Creator( - str(tmpdir / fname), main_page="welcome", index_language="eng", min_chunk_size=2048, + tmpdir / fname, main_page="welcome", index_language="eng", min_chunk_size=2048, ) as zim_creator: print(zim_creator) @@ -180,7 +166,25 @@ def test_noleftbehind_empty(tmpdir): def test_double_close(tmpdir): - creator = Creator(str(tmpdir / "test.zim"), "welcome", "eng", 2048) + creator = Creator(tmpdir / "test.zim", "welcome", "eng", 2048) creator.close() with pytest.raises(RuntimeError): creator.close() + + +def test_default_creator_params(tmpdir): + """ ensure we can init a Creator without specifying all params """ + creator = Creator(tmpdir / "test.zim", "welcome") + assert True # we could init the Creator without specifying other params + assert creator.language == "eng" + assert creator.main_page == "welcome" + + +def test_filename_param_types(tmpdir): + path = tmpdir / "test.zim" + with Creator(path, "welcome") as creator: + assert creator.filename == path + assert isinstance(creator.filename, pathlib.Path) + with Creator(str(path), "welcome") as creator: + assert creator.filename == path + assert isinstance(creator.filename, pathlib.Path) diff --git a/tests/test_libzim_file_reader.py b/tests/test_libzim_file_reader.py index 53cf6940..13edece1 100644 --- a/tests/test_libzim_file_reader.py +++ b/tests/test_libzim_file_reader.py @@ -3,14 +3,14 @@ import pytest -from libzim.reader import File +from libzim.reader import File, NotFound DATA_DIR = Path(__file__).parent ZIMFILES = [ { - "filename": str(DATA_DIR / "wikipedia_es_physics_mini.zim"), + "filename": DATA_DIR / "wikipedia_es_physics_mini.zim", "checksum": "99ea7a5598c6040c4f50b8ac0653b703", "namespaces": "-AIMX", "article_count": 22027, @@ -43,6 +43,7 @@ def article_data(): def test_zim_filename(reader, zimdata): for k, v in zimdata.items(): assert getattr(reader, k) == v + assert isinstance(reader.filename, Path) def test_zim_read(reader, article_data): @@ -110,9 +111,9 @@ def test_search(reader): def test_get_wrong_article(reader): - with pytest.raises(RuntimeError): + with pytest.raises(RuntimeError): # out of range reader.get_article_by_id(reader.article_count + 100) - with pytest.raises(RuntimeError): + with pytest.raises(NotFound): reader.get_article("A/I_do_not_exists")