From f1487a3cfa5cb73d71f5fb52e4f5f6809136bf1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 24 Oct 2021 21:15:21 +0200 Subject: [PATCH] [kemonoparty:discord] improve 'inline' extraction (#1940) - extract media.discordapp.*NET* URLs - rewrite media.discordapp.net to cdn.discordapp.com - use a more restricted set of characters for the URL path --- gallery_dl/extractor/kemonoparty.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 8dc29c4b43..d5aad67248 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -237,7 +237,8 @@ def items(self): self._prepare_ddosguard_cookies() find_inline = re.compile( - r"https?://(?:cdn|media)\.discordapp.com/\S+").findall + r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)" + r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall posts = self.posts() max_posts = self.config("max-posts") @@ -251,7 +252,8 @@ def items(self): attachment["type"] = "attachment" append(attachment) for path in find_inline(post["content"] or ""): - append({"path": path, "name": path, "type": "inline"}) + append({"path": "https://cdn.discordapp.com" + path, + "name": path, "type": "inline"}) post["channel_name"] = self.channel_name post["date"] = text.parse_datetime(