Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 29 additions & 10 deletions bot/exts/filtering/_filter_lists/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@
if typing.TYPE_CHECKING:
from bot.exts.filtering.filtering import Filtering

PASTE_URL = "https://paste.pythondiscord.com"
PY_EMBED_DESCRIPTION = (
"It looks like you tried to attach a Python file - "
f"please use a code-pasting service such as {PASTE_URL}"
)

TXT_LIKE_FILES = {".txt", ".csv", ".json"}
TXT_EMBED_DESCRIPTION = (
"You either uploaded a `{blocked_extension}` file or entered a message that was too long. "
f"Please use our [paste bin]({PASTE_URL}) instead."
)

DISALLOWED_EMBED_DESCRIPTION = (
"It looks like you tried to attach file type(s) that we do not allow ({joined_blacklist}). "
"We currently allow the following file types: **{joined_whitelist}**.\n\n"
Expand Down Expand Up @@ -75,23 +87,30 @@ async def actions_for(
not_allowed = {ext: filename for ext, filename in all_ext if ext not in allowed_ext}

if ctx.event == Event.SNEKBOX:
not_allowed = dict(not_allowed.items())
not_allowed = {ext: filename for ext, filename in not_allowed.items() if ext not in TXT_LIKE_FILES}

if not not_allowed: # Yes, it's a double negative. Meaning all attachments are allowed :)
return None, [], {ListType.ALLOW: triggered}

# At this point, something is disallowed.
if ctx.event != Event.SNEKBOX: # Don't post the embed if it's a snekbox response.
meta_channel = bot.instance.get_channel(Channels.meta)
if not self._whitelisted_description:
self._whitelisted_description = ", ".join(
filter_.content for filter_ in self[ListType.ALLOW].filters.values()
if ".py" in not_allowed:
# Provide a pastebin link for .py files.
ctx.dm_embed = PY_EMBED_DESCRIPTION
elif txt_extensions := {ext for ext in TXT_LIKE_FILES if ext in not_allowed}:
# Work around Discord auto-conversion of messages longer than 2000 chars to .txt
ctx.dm_embed = TXT_EMBED_DESCRIPTION.format(blocked_extension=txt_extensions.pop())
else:
meta_channel = bot.instance.get_channel(Channels.meta)
if not self._whitelisted_description:
self._whitelisted_description = ", ".join(
filter_.content for filter_ in self[ListType.ALLOW].filters.values()
)
ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format(
joined_whitelist=self._whitelisted_description,
joined_blacklist=", ".join(not_allowed),
meta_channel_mention=meta_channel.mention,
)
ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format(
joined_whitelist=self._whitelisted_description,
joined_blacklist=", ".join(not_allowed),
meta_channel_mention=meta_channel.mention,
)

ctx.matches += not_allowed.values()
ctx.blocked_exts |= set(not_allowed)
Expand Down
5 changes: 4 additions & 1 deletion bot/exts/filtering/_filter_lists/filter_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,10 @@ def __hash__(self):
return hash(id(self))


class FilterList[T: Filter](dict[ListType, AtomicList], FieldRequiring):
T = typing.TypeVar("T", bound=Filter)


class FilterList(dict[ListType, AtomicList], typing.Generic[T], FieldRequiring):
"""Dispatches events to lists of _filters, and aggregates the responses into a single list of actions to take."""

# Each subclass must define a name matching the filter_list name we're expecting to receive from the database.
Expand Down
14 changes: 9 additions & 5 deletions bot/exts/filtering/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,21 @@
from abc import abstractmethod
from copy import copy
from functools import reduce
from typing import Any, NamedTuple, Self
from typing import Any, NamedTuple, Self, TypeVar

from bot.exts.filtering._filter_context import FilterContext
from bot.exts.filtering._settings_types import settings_types
from bot.exts.filtering._settings_types.settings_entry import ActionEntry, SettingsEntry, ValidationEntry
from bot.exts.filtering._utils import FieldRequiring
from bot.log import get_logger

TSettings = TypeVar("TSettings", bound="Settings")

log = get_logger(__name__)

_already_warned = set[str]()
_already_warned: set[str] = set()

T = TypeVar("T", bound=SettingsEntry)


def create_settings(
Expand Down Expand Up @@ -51,7 +55,7 @@ def create_settings(
)


class Settings[T: SettingsEntry](FieldRequiring, dict[str, T]):
class Settings(FieldRequiring, dict[str, T]):
"""
A collection of settings.

Expand All @@ -65,7 +69,7 @@ class Settings[T: SettingsEntry](FieldRequiring, dict[str, T]):

entry_type: type[T]

_already_warned = set[str]()
_already_warned: set[str] = set()

@abstractmethod # ABCs have to have at least once abstract method to actually count as such.
def __init__(self, settings_data: dict, *, defaults: Settings | None = None, keep_empty: bool = False):
Expand Down Expand Up @@ -100,7 +104,7 @@ def overrides(self) -> dict[str, Any]:
"""Return a dictionary of overrides across all entries."""
return reduce(operator.or_, (entry.overrides for entry in self.values() if entry), {})

def copy(self: Self) -> Self:
def copy(self: TSettings) -> TSettings:
"""Create a shallow copy of the object."""
return copy(self)

Expand Down
19 changes: 1 addition & 18 deletions bot/exts/filtering/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,6 @@
WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday


async def _extract_text_file_content(att: discord.Attachment) -> str:
"""Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment."""
file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1)
file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines()
first_n_lines = "\n".join(file_lines[:30])[:2_000]
return f"{att.filename}: {first_n_lines}"


class Filtering(Cog):
"""Filtering and alerting for content posted on the server."""

Expand All @@ -88,7 +80,7 @@ class Filtering(Cog):
def __init__(self, bot: Bot):
self.bot = bot
self.filter_lists: dict[str, FilterList] = {}
self._subscriptions = defaultdict[Event, list[FilterList]](list)
self._subscriptions: defaultdict[Event, list[FilterList]] = defaultdict(list)
self.delete_scheduler = scheduling.Scheduler(self.__class__.__name__)
self.webhook: discord.Webhook | None = None

Expand Down Expand Up @@ -231,15 +223,6 @@ async def on_message(self, msg: Message) -> None:
self.message_cache.append(msg)

ctx = FilterContext.from_message(Event.MESSAGE, msg, None, self.message_cache)

text_contents = [
await _extract_text_file_content(a)
for a in msg.attachments if "charset" in a.content_type
]
if text_contents:
attachment_content = "\n\n".join(text_contents)
ctx = ctx.replace(content=f"{ctx.content}\n\n{attachment_content}")

result_actions, list_messages, triggers = await self._resolve_action(ctx)
self.message_cache.update(msg, metadata=triggers)
if result_actions:
Expand Down
144 changes: 0 additions & 144 deletions bot/exts/utils/attachment_pastebin_uploader.py

This file was deleted.

3 changes: 1 addition & 2 deletions bot/exts/utils/snekbox/_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from bot.bot import Bot
from bot.constants import BaseURLs, Channels, Emojis, MODERATION_ROLES, Roles, URLs
from bot.decorators import redirect_output
from bot.exts.filtering._filter_lists.extension import TXT_LIKE_FILES
from bot.exts.help_channels._channel import is_help_forum_post
from bot.exts.utils.snekbox._eval import EvalJob, EvalResult
from bot.exts.utils.snekbox._io import FileAttachment
Expand All @@ -31,8 +32,6 @@
ANSI_REGEX = re.compile(r"\N{ESC}\[[0-9;:]*m")
ESCAPE_REGEX = re.compile("[`\u202E\u200B]{3,}")

TXT_LIKE_FILES = {".txt", ".csv", ".json", ".py"}

# The timeit command should only output the very last line, so all other output should be suppressed.
# This will be used as the setup code along with any setup code provided.
TIMEIT_SETUP_WRAPPER = """
Expand Down