Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions bot/exts/filters/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
from bot.exts.events.code_jams._channels import CATEGORY_NAME as JAM_CATEGORY_NAME
from bot.exts.moderation.modlog import ModLog
from bot.log import get_logger
from bot.utils.helpers import remove_subdomain_from_url
from bot.utils.messages import format_user

log = get_logger(__name__)


# Regular expressions
CODE_BLOCK_RE = re.compile(
r"(?P<delim>``?)[^`]+?(?P=delim)(?!`+)" # Inline codeblock
Expand Down Expand Up @@ -583,7 +585,7 @@ async def _has_invites(self, text: str) -> Union[dict, bool]:
"""
text = self.clean_input(text)

# Remove backslashes to prevent escape character aroundfuckery like
# Remove backslashes to prevent escape character around fuckery like
# discord\.gg/gdudes-pony-farm
text = text.replace("\\", "")

Expand Down Expand Up @@ -649,7 +651,13 @@ async def _has_rich_embed(msg: Message) -> Union[bool, List[Embed]]:
for embed in msg.embeds:
if embed.type == "rich":
urls = URL_RE.findall(msg.content)
if not embed.url or embed.url not in urls:
final_urls = set(urls)
# This is due to way discord renders relative urls in Embdes
# if we send the following url: https://mobile.twitter.com/something
# Discord renders it as https://twitter.com/something
for url in urls:
final_urls.add(remove_subdomain_from_url(url))
if not embed.url or embed.url not in final_urls:
# If `embed.url` does not exist or if `embed.url` is not part of the content
# of the message, it's unlikely to be an auto-generated embed by Discord.
return msg.embeds
Expand Down
12 changes: 12 additions & 0 deletions bot/utils/helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from abc import ABCMeta
from typing import Optional
from urllib.parse import urlparse

from discord.ext.commands import CogMeta
from tldextract import extract


class CogABCMeta(CogMeta, ABCMeta): # noqa: B024 (Ignore abstract class with no abstract methods.)
Expand Down Expand Up @@ -30,3 +32,13 @@ def has_lines(string: str, count: int) -> bool:
def pad_base64(data: str) -> str:
"""Return base64 `data` with padding characters to ensure its length is a multiple of 4."""
return data + "=" * (-len(data) % 4)


def remove_subdomain_from_url(url: str) -> str:
"""Removes subdomains from a URL whilst preserving the original URL composition."""
parsed_url = urlparse(url)
extracted_url = extract(url)
# Eliminate subdomain by using the registered domain only
netloc = extracted_url.registered_domain
parsed_url = parsed_url._replace(netloc=netloc)
return parsed_url.geturl()