Skip to content

Commit

Permalink
[kemonoparty:discord] improve 'inline' extraction (#1940)
Browse files Browse the repository at this point in the history
- extract media.discordapp.*NET* URLs
- rewrite media.discordapp.net to cdn.discordapp.com
- use a more restricted set of characters for the URL path
  • Loading branch information
mikf committed Oct 24, 2021
1 parent 02a247f commit f1487a3
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions gallery_dl/extractor/kemonoparty.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ def items(self):
self._prepare_ddosguard_cookies()

find_inline = re.compile(
r"https?://(?:cdn|media)\.discordapp.com/\S+").findall
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall

posts = self.posts()
max_posts = self.config("max-posts")
Expand All @@ -251,7 +252,8 @@ def items(self):
attachment["type"] = "attachment"
append(attachment)
for path in find_inline(post["content"] or ""):
append({"path": path, "name": path, "type": "inline"})
append({"path": "https://cdn.discordapp.com" + path,
"name": path, "type": "inline"})

post["channel_name"] = self.channel_name
post["date"] = text.parse_datetime(
Expand Down

0 comments on commit f1487a3

Please sign in to comment.