Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement new attachment handlers #3

Draft
wants to merge 13 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
!/cogs
!/core
!/plugins
!/src
!*.py
!LICENSE
!pdm.lock
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ repos:
hooks:
- id: ruff
- repo: https://github.com/pdm-project/pdm
rev: 2.10.0 # a PDM release exposing the hook
rev: 2.11.2 # a PDM release exposing the hook
hooks:
- id: pdm-export
# command arguments, e.g.:
args: [ '-o', 'requirements.txt', '--without-hashes' ]
files: ^pdm.lock$
- repo: https://github.com/pdm-project/pdm
rev: 2.10.0 # a PDM release exposing the hook
rev: 2.11.2 # a PDM release exposing the hook
hooks:
- id: pdm-lock-check
22 changes: 22 additions & 0 deletions bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from emoji import UNICODE_EMOJI
from pkg_resources import parse_version

from core.attachments.AttachmentHandler import IAttachmentHandler
from core.attachments.MongoAttachmentClient import MongoAttachmentHandler
from core.attachments.S3AttachmentClient import S3AttachmentHandler
from core.blocklist import Blocklist, BlockReason

try:
Expand Down Expand Up @@ -92,6 +95,25 @@ def __init__(self):

self.blocklist = Blocklist(bot=self)

if self.config["attachment_datastore"] == "internal":
logger.info("Using internal attachment handler.")
self.attachment_handler: IAttachmentHandler = MongoAttachmentHandler(self.api.db)
elif self.config["attachment_datastore"] == "s3":
logger.info("Using S3 attachment handler.")
endpoint = self.config["s3_endpoint"]
if endpoint is None:
logger.critical("S3 endpoint must be set when using the S3 attachment datastore.")
raise InvalidConfigError("s3_endpoint must be set.")
self.attachment_handler: IAttachmentHandler = S3AttachmentHandler(
endpoint=endpoint,
access_key=self.config["s3_access_key"] or None,
secret_key=self.config["s3_secret_key"] or None,
region=self.config["s3_region"] or None,
bucket=self.config["s3_bucket"] or None,
)
else:
raise InvalidConfigError("Invalid image_store option set.")

self.startup()

def get_guild_icon(
Expand Down
26 changes: 26 additions & 0 deletions core/attachments/AttachmentHandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from abc import ABC, abstractmethod

from discord.message import Message


class IAttachmentHandler(ABC):
@abstractmethod
async def upload_attachments(self, message: Message) -> list[dict]:
"""
Uploads all attachments from a message to the database
Parameters
----------
message
Returns
-------
A dict containing what should be appended to the thread documents attachments field
"""
pass

@abstractmethod
async def download_attachment(self, attachment_id: int) -> dict:
pass

@abstractmethod
async def delete_attachment(self, attachment_id: int) -> dict:
pass
108 changes: 108 additions & 0 deletions core/attachments/MongoAttachmentClient.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import asyncio
import datetime
from typing import Any

from discord.message import Attachment, Message
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
from pymongo.results import InsertOneResult

from core.attachments.AttachmentHandler import IAttachmentHandler
from core.models import getLogger


def _mongo_attachment_dict(attachment: Attachment, data: bytes) -> dict:
"""
Convert a discord attachment to a dict that can be stored in the database
Parameters
----------
attachment
data

Returns
-------
dict
"""
return {
# same as discord id
"_id": attachment.id,
"filename": attachment.filename,
"content_type": attachment.content_type,
"width": attachment.width,
"height": attachment.height,
"description": attachment.description,
"size": attachment.size,
"data": data,
"uploaded_at": datetime.datetime.now(datetime.timezone.utc),
}


class MongoAttachmentHandler(IAttachmentHandler):
logger = getLogger(__name__)

# 8 MB to bytes
image_max_size = 1024 * 1024 * 8

def __init__(self, database: AsyncIOMotorDatabase) -> None:
self.client = database
self.attachment_collection: AsyncIOMotorCollection = database["attachments"]
# self.log_collection: AsyncIOMotorCollection = database["logs"]

async def _store_attachments_bulk(self, attachments: list[Attachment]) -> Any:
attachment_data = await asyncio.gather(*[attachment.read() for attachment in attachments])

results = await self.attachment_collection.insert_many(
[
_mongo_attachment_dict(attachment, attachment_data[index])
for index, attachment in enumerate(attachments)
]
)

return results.inserted_ids

async def _store_attachment(self, attachment: Attachment) -> Any:
result: InsertOneResult = await self.attachment_collection.insert_one(
_mongo_attachment_dict(attachment, await attachment.read())
)
return result.inserted_id

async def upload_attachments(
khakers marked this conversation as resolved.
Show resolved Hide resolved
self,
message: Message,
) -> list[dict]:
attachments = []

if len(message.attachments) > 1:
await self._store_attachments_bulk(message.attachments)
else:
await self._store_attachment(message.attachments[0])

for attachment in message.attachments:
attachments.append(
{
"id": attachment.id,
"filename": attachment.filename,
"type": "internal",
# URL points to the original discord URL
"url": attachment.url,
"content_type": attachment.content_type,
"width": attachment.width,
"height": attachment.height,
}
)

return attachments

def download_attachment(self, attachment_id: int) -> dict:
pass

def delete_attachment(self, attachment_id: int) -> dict:
pass

def set_max_size(self, size: int) -> None:
"""
Set the maximum size of an image that can be stored in the database.
Parameters
----------
size
"""
self.image_max_size = size
90 changes: 90 additions & 0 deletions core/attachments/S3AttachmentClient.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import datetime
import io

from discord import Message
from minio import Minio
from minio.commonconfig import Tags

from core.attachments.AttachmentHandler import IAttachmentHandler


class S3AttachmentHandler(IAttachmentHandler):
def __init__(
self,
endpoint: str,
access_key: str | None = None,
secret_key: str | None = None,
region: str | None = None,
bucket: str | None = "modmail-attachments",
) -> None:
"""
Initialize the S3AttachmentHandler with the given access key, secret key, and endpoint.

Parameters
----------
access_key : str | None
The access key for the S3 bucket.
secret_key : str | None
The secret key for the S3 bucket.
endpoint : str
The endpoint for the S3 bucket.
"""
self.bucket = bucket
self.region = region

self.client = Minio(
endpoint, access_key=access_key, secret_key=secret_key, secure=False, region=region
)
if not self.client.bucket_exists(self.bucket):
self.client.make_bucket(self.bucket)

async def upload_attachments(self, message: Message) -> list[dict]:
"""
Upload attachments from a given message to the S3 bucket.

Parameters
----------
message : Message
The message containing the attachments to upload.

Returns
-------
list[dict]
A list of dictionaries containing information about the uploaded attachments.
"""
attachments = []
tags = Tags.new_object_tags()
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should include tag for bot ID to help people that are sharing a bucket between multiple bots that they host

tags["message_id"] = str(message.id)
tags["channel_id"] = str(message.channel.id)
for attachment in message.attachments:
result = self.client.put_object(
self.bucket,
str(attachment.id),
io.BytesIO(await attachment.read()),
length=attachment.size,
content_type=attachment.content_type,
tags=tags,
)
attachments.append(
{
"id": attachment.id,
"filename": attachment.filename,
"type": "s3",
"s3_object": result.object_name,
"s3_bucket": result.bucket_name,
"content_type": attachment.content_type,
"width": attachment.width,
"height": attachment.height,
"description": attachment.description,
"size": attachment.size,
"uploaded_at": datetime.datetime.now(datetime.timezone.utc),
}
)

return attachments

async def download_attachment(self, attachment_id: int) -> dict:
pass

async def delete_attachment(self, attachment_id: int) -> dict:
pass
19 changes: 4 additions & 15 deletions core/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from aiohttp import ClientResponse, ClientResponseError
from discord import DMChannel, Member, Message, TextChannel
from discord.ext import commands
from motor.motor_asyncio import AsyncIOMotorClient
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase
from pymongo.errors import ConfigurationError
from pymongo.uri_parser import parse_uri

Expand Down Expand Up @@ -299,7 +299,7 @@ class ApiClient:

def __init__(self, bot, db):
self.bot = bot
self.db = db
self.db: AsyncIOMotorDatabase = db
self.session = bot.session

async def request(
Expand Down Expand Up @@ -452,7 +452,7 @@ def __init__(self, bot):

try:
database = parse_uri(mongo_uri).get("database") or "modmail_bot"
db = AsyncIOMotorClient(mongo_uri)[database]
db: AsyncIOMotorDatabase = AsyncIOMotorClient(mongo_uri)[database]
except ConfigurationError as e:
logger.critical(
"Your MongoDB CONNECTION_URI might be copied wrong, try re-copying from the source again. "
Expand Down Expand Up @@ -668,18 +668,7 @@ async def append_log(
},
"content": message.content,
"type": type_,
"attachments": [
{
"id": a.id,
"filename": a.filename,
# In previous versions this was true for both videos and images
"is_image": a.content_type.startswith("image/"),
"size": a.size,
"url": a.url,
"content_type": a.content_type,
}
for a in message.attachments
],
"attachments": await self.bot.attachment_handler.upload_attachments(message),
}

return await self.logs.find_one_and_update(
Expand Down
6 changes: 6 additions & 0 deletions core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,12 @@ class ConfigManager:
"log_level": "INFO",
# data collection
"data_collection": True,
"attachment_datastore": "internal",
"s3_endpoint": None,
"s3_access_key": None,
"s3_secret_key": None,
"s3_region": None,
"s3_bucket": "modmail-attachments",
}

colors = {"mod_color", "recipient_color", "main_color", "error_color"}
Expand Down
Loading
Loading