Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion src/backend/core/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
# pylint: disable=too-many-lines

import json
from urllib.parse import quote

from bs4 import BeautifulSoup
from django.conf import settings
from django.db import transaction
from django.db.models import Count, Exists, OuterRef, Q
from django.utils.translation import gettext_lazy as _
Expand Down Expand Up @@ -661,7 +664,55 @@ def get_textBody(self, instance): # pylint: disable=invalid-name
@extend_schema_field(serializers.ListField(child=serializers.DictField()))
def get_htmlBody(self, instance): # pylint: disable=invalid-name
"""Return the list of HTML body parts (JMAP style)."""
return instance.get_parsed_field("htmlBody") or []
html_body_parts = instance.get_parsed_field("htmlBody") or []

request = self.context.get("request")
if not request or not hasattr(request, "user") or not request.user.is_authenticated:
return html_body_parts

mailbox = instance.thread.accesses.filter(
mailbox__accesses__user=request.user
).first()

if not mailbox:
return html_body_parts

if settings.PROXY_EXTERNAL_IMAGES:
html_body_parts = self._proxy_images_in_html(html_body_parts, instance, mailbox.mailbox)

return html_body_parts

def _proxy_images_in_html(self, html_body_parts, instance, mailbox):
"""Rewrite external image URLs and CID references to use proxy."""
attachments = instance.get_parsed_field("attachments") or []
cid_map = {att.get("cid"): idx for idx, att in enumerate(attachments) if att.get("cid")}

proxified_parts = []
for part in html_body_parts:
html_content = part.get("content", "")
if not html_content:
proxified_parts.append(part)
continue

soup = BeautifulSoup(html_content, "html.parser")

for img in soup.find_all("img"):
src = img.get("src")
if not src:
continue

if src.startswith("cid:"):
cid = src[4:]
if cid in cid_map:
img["src"] = f"/api/{settings.API_VERSION}/blob/msg_{instance.id}_{cid_map[cid]}/download/"

elif src.startswith(("http://", "https://")):
img["src"] = f"/api/{settings.API_VERSION}/mailboxes/{mailbox.id}/image-proxy/?url={quote(src)}"

part["content"] = str(soup)
proxified_parts.append(part)

return proxified_parts

@extend_schema_field(serializers.CharField(allow_null=True))
def get_draftBody(self, instance): # pylint: disable=invalid-name
Expand Down
4 changes: 4 additions & 0 deletions src/backend/core/api/viewsets/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ def download(self, request, pk=None):
f'attachment; filename="{attachment["name"]}"'
)
response["Content-Length"] = attachment["size"]
# Enable browser caching for 30 days (inline images benefit from this)
response["Cache-Control"] = "private, max-age=2592000"

else:
# Get the blob
Expand All @@ -218,6 +220,8 @@ def download(self, request, pk=None):
# Add appropriate headers for download
response["Content-Disposition"] = f'attachment; filename="{filename}"'
response["Content-Length"] = blob.size
# Enable browser caching for 30 days (inline images benefit from this)
response["Cache-Control"] = "private, max-age=2592000"

return response

Expand Down
198 changes: 198 additions & 0 deletions src/backend/core/api/viewsets/image_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
"""API ViewSet for proxying external images."""

import logging
from urllib.parse import unquote

import magic
import requests
from django.conf import settings
from django.http import HttpResponse
from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema
from rest_framework import status
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.viewsets import ViewSet

from core import models
from core.api import permissions
from core.utils import validate_url_safety

logger = logging.getLogger(__name__)


class ImageProxyViewSet(ViewSet):
"""
ViewSet for proxying external images to protect user privacy.

Images are fetched on-demand from external sources and served through
the application. This prevents tracking pixels from leaking user IP
addresses and browsing behavior to external servers.
"""

permission_classes = [permissions.IsAuthenticated]

@extend_schema(
description="""Proxy an external image through the server.

This endpoint fetches images from external sources and serves them
through the application to protect user privacy. Requires the
PROXY_EXTERNAL_IMAGES environment variable to be set to true.
""",
parameters=[
OpenApiParameter(
name="mailbox_id",
type=str,
location=OpenApiParameter.PATH,
description="ID of the mailbox",
required=True,
),
OpenApiParameter(
name="url",
type=str,
location=OpenApiParameter.QUERY,
description="The external image URL to proxy",
required=True,
),
],
responses={
200: OpenApiResponse(description="Image content"),
400: OpenApiResponse(description="Invalid request"),
403: OpenApiResponse(description="Forbidden"),
413: OpenApiResponse(description="Image too large"),
502: OpenApiResponse(description="Failed to fetch external image"),
},
Comment on lines +57 to +63
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Add 404 response to OpenAPI schema.

The implementation returns 404 when the mailbox is not found (lines 70-72), but this response code is not documented in the OpenAPI schema.

Apply this diff:

         responses={
+            404: OpenApiResponse(description="Mailbox not found"),
         },
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
responses={
200: OpenApiResponse(description="Image content"),
400: OpenApiResponse(description="Invalid request"),
403: OpenApiResponse(description="Forbidden"),
413: OpenApiResponse(description="Image too large"),
502: OpenApiResponse(description="Failed to fetch external image"),
},
responses={
200: OpenApiResponse(description="Image content"),
400: OpenApiResponse(description="Invalid request"),
403: OpenApiResponse(description="Forbidden"),
404: OpenApiResponse(description="Mailbox not found"),
413: OpenApiResponse(description="Image too large"),
502: OpenApiResponse(description="Failed to fetch external image"),
},
🤖 Prompt for AI Agents
In src/backend/core/api/viewsets/image_proxy.py around lines 57 to 63, the
OpenAPI responses mapping is missing the 404 entry even though the handler
returns 404 when a mailbox is not found (lines 70-72); add a 404:
OpenApiResponse(description="Not found" or "Mailbox not found") entry to the
responses dict so the OpenAPI schema documents the 404 case.

)
def list(self, request, mailbox_id=None):
"""Proxy an external image through the server."""
try:
mailbox = models.Mailbox.objects.get(pk=mailbox_id)
except models.Mailbox.DoesNotExist:
return Response(
{"error": "Mailbox not found"}, status=status.HTTP_404_NOT_FOUND
)

if not mailbox.accesses.filter(user=request.user).exists():
return Response(
{"error": "Forbidden"}, status=status.HTTP_403_FORBIDDEN
)

if not settings.PROXY_EXTERNAL_IMAGES:
return Response(
{"error": "Image proxy not enabled"},
status=status.HTTP_403_FORBIDDEN,
)

url = request.query_params.get("url")
if not url:
return Response(
{"error": "Missing url parameter"}, status=status.HTTP_400_BAD_REQUEST
)

url = unquote(url)

# SSRF protection: validate URL before making any request
is_safe, error_message = validate_url_safety(url)
if not is_safe:
logger.warning("Blocked unsafe URL: %s - %s", url, error_message)
# Return placeholder image instead of JSON error for better UX
svg_placeholder = """<svg xmlns="http://www.w3.org/2000/svg" width="400" height="100" viewBox="0 0 400 100">
<rect width="100%" height="100%" fill="#f8f9fa"/>
<text x="50%" y="50%" text-anchor="middle" dominant-baseline="middle"
font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#6c757d">
🚫 Image blocked for security reasons
</text>
</svg>"""
return HttpResponse(
svg_placeholder,
content_type="image/svg+xml",
status=403,
)
Comment on lines +93 to +109
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Extract duplicate SVG placeholder to a module-level constant.

The SVG placeholder for blocked images appears here and again at lines 172-178. This violates DRY principles and makes updates error-prone.

Apply this diff:

 logger = logging.getLogger(__name__)
+
+# Placeholder SVG shown when images are blocked for security or validation reasons
+BLOCKED_IMAGE_SVG = """<svg xmlns="http://www.w3.org/2000/svg" width="400" height="100" viewBox="0 0 400 100">
+  <rect width="100%" height="100%" fill="#f8f9fa"/>
+  <text x="50%" y="50%" text-anchor="middle" dominant-baseline="middle"
+        font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#6c757d">
+    🚫 Image blocked for security reasons
+  </text>
+</svg>"""


 class ImageProxyViewSet(ViewSet):

Then use it at line 98 and line 172:

-            svg_placeholder = """<svg xmlns="http://www.w3.org/2000/svg" width="400" height="100" viewBox="0 0 400 100">
-  <rect width="100%" height="100%" fill="#f8f9fa"/>
-  <text x="50%" y="50%" text-anchor="middle" dominant-baseline="middle"
-        font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#6c757d">
-    🚫 Image blocked for security reasons
-  </text>
-</svg>"""
             return HttpResponse(
-                svg_placeholder,
+                BLOCKED_IMAGE_SVG,
                 content_type="image/svg+xml",
                 status=403,
             )

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In src/backend/core/api/viewsets/image_proxy.py around lines 93-109 and again at
lines ~172-178, the SVG placeholder is duplicated; extract that SVG string into
a single module-level constant (e.g., BLOCKED_IMAGE_SVG) defined near the top of
the file, then replace the inline SVG literals at line 98 and line 172 with
references to that constant when constructing the HttpResponse (preserve
content_type="image/svg+xml" and status=403). Ensure the constant is a plain
triple-quoted string and update any imports or linter notes if necessary.


max_size = settings.PROXY_MAX_IMAGE_SIZE_MB * 1024 * 1024

try:
response = requests.get(
url,
timeout=10,
stream=True,
headers={"User-Agent": "Messages-ImageProxy/1.0"},
allow_redirects=False, # Prevent redirect-based SSRF bypass
)
response.raise_for_status()

content_type = response.headers.get("content-type", "")
if not content_type.startswith("image/"):
return Response(
{"error": "Not an image"}, status=status.HTTP_400_BAD_REQUEST
)

# Safely parse Content-Length header
try:
content_length = int(response.headers.get("content-length", 0))
except (TypeError, ValueError):
content_length = 0

# Use Content-Length as a hint, but don't trust it completely
if content_length and content_length > max_size:
return Response(
{"error": "Image too large"},
status=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
)

# Stream content in chunks to prevent memory exhaustion
chunks = []
total_size = 0
chunk_size = 8192 # 8KB chunks

for chunk in response.iter_content(chunk_size=chunk_size):
if not chunk:
continue

total_size += len(chunk)

# Enforce size limit while streaming
if total_size > max_size:
logger.warning(
"Image from %s exceeds size limit: %d bytes", url, total_size
)
return Response(
{"error": "Image too large"},
status=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
)

chunks.append(chunk)

image_content = b"".join(chunks)

# Validate that content is actually an image (defense in depth)
mime_type = magic.from_buffer(image_content, mime=True)
if not mime_type.startswith("image/"):
logger.warning("Content from %s is not a valid image: %s", url, mime_type)
# Return placeholder image for invalid content
svg_placeholder = """<svg xmlns="http://www.w3.org/2000/svg" width="400" height="100" viewBox="0 0 400 100">
<rect width="100%" height="100%" fill="#f8f9fa"/>
<text x="50%" y="50%" text-anchor="middle" dominant-baseline="middle"
font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#6c757d">
🚫 Image blocked for security reasons
</text>
</svg>"""
return HttpResponse(
svg_placeholder,
content_type="image/svg+xml",
status=400,
)

return HttpResponse(
image_content,
content_type=content_type,
headers={
"Cache-Control": "public, max-age=2592000",
"X-Proxied-From": url,
},
)

except requests.RequestException as e:
logger.warning("Failed to fetch external image from %s: %s", url, e)
return Response(
{"error": "Failed to fetch image"}, status=status.HTTP_502_BAD_GATEWAY
)
13 changes: 13 additions & 0 deletions src/backend/core/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from core.api.viewsets.contacts import ContactViewSet
from core.api.viewsets.draft import DraftMessageView
from core.api.viewsets.flag import ChangeFlagView
from core.api.viewsets.image_proxy import ImageProxyViewSet
from core.api.viewsets.import_message import ImportViewSet, MessagesArchiveUploadViewSet
from core.api.viewsets.inbound.mta import InboundMTAViewSet
from core.api.viewsets.inbound.widget import InboundWidgetViewSet
Expand Down Expand Up @@ -66,6 +67,12 @@
r"accesses", MailboxAccessViewSet, basename="mailboxaccess"
)

# Router for /mailboxes/{mailbox_id}/image-proxy/
mailbox_image_proxy_nested_router = DefaultRouter()
mailbox_image_proxy_nested_router.register(
r"image-proxy", ImageProxyViewSet, basename="image-proxy"
)

# Router for /maildomains/{maildomain_pk}/**/
maildomain_nested_router = DefaultRouter()
# Register /maildomains/{maildomain_pk}/mailboxes/
Expand Down Expand Up @@ -129,6 +136,12 @@
mailbox_access_nested_router.urls
), # Includes /mailboxes/{id}/accesses/
),
path(
"mailboxes/<uuid:mailbox_id>/",
include(
mailbox_image_proxy_nested_router.urls
), # Includes /mailboxes/{id}/image-proxy/
),
path(
"mailboxes/<uuid:mailbox_id>/",
include(
Expand Down
Loading
Loading