From 0560e80b030a68f18eb4878c1d1de745e3070b2d Mon Sep 17 00:00:00 2001 From: eggy Date: Thu, 6 Oct 2022 15:45:52 -0400 Subject: [PATCH] fix(readcomiconline): use better id grabber Although cloudflare has been turned off, they have this really annoying system that makes it a pain to download anything (see https://github.com/Xonshiz/comic-dl/issues/299) --- mandown/sources/source_readcomiconline.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mandown/sources/source_readcomiconline.py b/mandown/sources/source_readcomiconline.py index 69428cd..6feae88 100644 --- a/mandown/sources/source_readcomiconline.py +++ b/mandown/sources/source_readcomiconline.py @@ -3,6 +3,7 @@ """ # pylint: disable=invalid-name + import re import requests @@ -34,7 +35,8 @@ def fetch_metadata(self) -> BaseMetadata: ) ] genres: list[str] = [str(e.text) for e in soup.select("a[href^='/Genre']")] - description = str(soup.select_one("p[style='text-align: justify;']").text) + description_maybe = soup.select_one("p[style='text-align: justify;']") + description = str(description_maybe.text if description_maybe else "") cover = self.domains[0] + str(soup.find("link")["href"]) return BaseMetadata(title, author, self.url, genres, description, cover) @@ -58,16 +60,19 @@ def fetch_chapter_image_list(self, chapter: BaseChapter) -> list[str]: images: list[str] = [] start = 0 while (index := text.find("lstImages.push(", start)) != -1: - s_index = index + len('lstImages.push("') - e_index = text.find('");', s_index) + s_index = index + len("lstImages.push('") + e_index = text.find("');", s_index) images.append(text[s_index:e_index]) start = e_index return images @classmethod def url_to_id(cls, url: str) -> str: - *_, last_item = filter(None, url.split("/")) - return last_item + segments = url.split("/") + for i, s in enumerate(segments): + if s == "Comic": + return segments[i + 1] + raise ValueError("Invalid comic URL") @staticmethod def check_url(url: str) -> bool: