python-discord · mbaruh · Oct 18, 2022 · Sep 28, 2022 · Sep 28, 2022 · Sep 28, 2022
diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py
@@ -23,10 +23,12 @@
 from bot.exts.events.code_jams._channels import CATEGORY_NAME as JAM_CATEGORY_NAME
 from bot.exts.moderation.modlog import ModLog
 from bot.log import get_logger
+from bot.utils.helpers import remove_subdomain_from_url
 from bot.utils.messages import format_user
 
 log = get_logger(__name__)
 
+
 # Regular expressions
 CODE_BLOCK_RE = re.compile(
     r"(?P<delim>``?)[^`]+?(?P=delim)(?!`+)"  # Inline codeblock
@@ -583,7 +585,7 @@ async def _has_invites(self, text: str) -> Union[dict, bool]:
         """
         text = self.clean_input(text)
 
-        # Remove backslashes to prevent escape character aroundfuckery like
+        # Remove backslashes to prevent escape character around fuckery like
         # discord\.gg/gdudes-pony-farm
         text = text.replace("\\", "")
 
@@ -649,7 +651,13 @@ async def _has_rich_embed(msg: Message) -> Union[bool, List[Embed]]:
             for embed in msg.embeds:
                 if embed.type == "rich":
                     urls = URL_RE.findall(msg.content)
-                    if not embed.url or embed.url not in urls:
+                    final_urls = set(urls)
+                    # This is due to way discord renders relative urls in Embdes
+                    # if we send the following url: https://mobile.twitter.com/something
+                    # Discord renders it as https://twitter.com/something
+                    for url in urls:
+                        final_urls.add(remove_subdomain_from_url(url))
+                    if not embed.url or embed.url not in final_urls:
                         # If `embed.url` does not exist or if `embed.url` is not part of the content
                         # of the message, it's unlikely to be an auto-generated embed by Discord.
                         return msg.embeds

diff --git a/bot/utils/helpers.py b/bot/utils/helpers.py
@@ -1,7 +1,9 @@
 from abc import ABCMeta
 from typing import Optional
+from urllib.parse import urlparse
 
 from discord.ext.commands import CogMeta
+from tldextract import extract
 
 
 class CogABCMeta(CogMeta, ABCMeta):  # noqa: B024 (Ignore abstract class with no abstract methods.)
@@ -30,3 +32,13 @@ def has_lines(string: str, count: int) -> bool:
 def pad_base64(data: str) -> str:
     """Return base64 `data` with padding characters to ensure its length is a multiple of 4."""
     return data + "=" * (-len(data) % 4)
+
+
+def remove_subdomain_from_url(url: str) -> str:
+    """Removes subdomains from a URL whilst preserving the original URL composition."""
+    parsed_url = urlparse(url)
+    extracted_url = extract(url)
+    # Eliminate subdomain by using the registered domain only
+    netloc = extracted_url.registered_domain
+    parsed_url = parsed_url._replace(netloc=netloc)
+    return parsed_url.geturl()