Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 52 additions & 24 deletions apps/api/plane/bgtasks/work_item_link_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from urllib.parse import urlparse, urljoin
import base64
import ipaddress
from typing import Dict, Any
from typing import Dict, Any, Tuple
from typing import Optional
from plane.db.models import IssueLink
from plane.utils.exception_logger import log_exception
Expand Down Expand Up @@ -66,6 +66,52 @@ def validate_url_ip(url: str) -> None:
MAX_REDIRECTS = 5


def safe_get(
url: str,
headers: Optional[Dict[str, str]] = None,
timeout: int = 1,
) -> Tuple[requests.Response, str]:
"""
Perform a GET request that validates every redirect hop against private IPs.
Prevents SSRF by ensuring no redirect lands on a private/internal address.

Args:
url: The URL to fetch
headers: Optional request headers
timeout: Request timeout in seconds

Returns:
A tuple of (final Response object, final URL after redirects)

Raises:
ValueError: If any URL in the redirect chain points to a private IP
requests.RequestException: On network errors
RuntimeError: If max redirects exceeded
"""
validate_url_ip(url)

current_url = url
response = requests.get(
current_url, headers=headers, timeout=timeout, allow_redirects=False
)

redirect_count = 0
while response.is_redirect:
if redirect_count >= MAX_REDIRECTS:
raise RuntimeError(f"Too many redirects for URL: {url}")
redirect_url = response.headers.get("Location")
if not redirect_url:
break
current_url = urljoin(current_url, redirect_url)
validate_url_ip(current_url)
redirect_count += 1
response = requests.get(
current_url, headers=headers, timeout=timeout, allow_redirects=False
Comment on lines +91 to +107
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

# First, let's examine the file to understand the implementation
cat -n apps/api/plane/bgtasks/work_item_link_task.py | head -150

Repository: makeplane/plane

Length of output: 6259


🏁 Script executed:

# Let's also look for the validate_url_ip function definition
rg -n "def validate_url_ip" apps/api/plane/bgtasks/work_item_link_task.py

Repository: makeplane/plane

Length of output: 100


🏁 Script executed:

# Search for any imports related to requests or socket
rg -n "^import|^from" apps/api/plane/bgtasks/work_item_link_task.py | head -30

Repository: makeplane/plane

Length of output: 429


🏁 Script executed:

# Search for any connection pooling, session management, or DNS caching configuration
rg -n "Session|Pool|Cache|resolve|getaddrinfo|urllib3" apps/api/plane/bgtasks/work_item_link_task.py

Repository: makeplane/plane

Length of output: 250


🏁 Script executed:

# Check if requests library is configured with any session or custom resolver
rg -n "requests.Session|HTTPAdapter|resolver" apps/api/plane/

Repository: makeplane/plane

Length of output: 281


🏁 Script executed:

# Check environment setup or global request configuration
fd -type f -name "*.py" apps/api/ | xargs rg -l "requests.packages|urllib3" | head -10

Repository: makeplane/plane

Length of output: 229


🏁 Script executed:

# Check the requests library version and any DNS caching behavior in requirements or setup files
find . -name "requirements*.txt" -o -name "setup.py" -o -name "setup.cfg" -o -name "pyproject.toml" | xargs grep -l "requests" 2>/dev/null | head -5

Repository: makeplane/plane

Length of output: 41


🏁 Script executed:

# Find dependency specifications
fd -type f "requirements|setup|pyproject" | grep -E "(txt|py|toml|cfg)$"

Repository: makeplane/plane

Length of output: 229


🌐 Web query:

urllib3 requests library DNS resolution caching behavior

💡 Result:

  • urllib3 (and therefore requests) does not implement its own DNS cache. When it needs a new TCP connection, it resolves hostnames via the OS resolver by calling socket.getaddrinfo() (i.e., whatever your system/NSS/DNS configuration does). [1]
  • Any “DNS caching” you observe typically comes from connection reuse, not cached DNS answers. Both urllib3 and requests.Session() keep connection pools keyed by host/port and will reuse an existing keep-alive connection when possible—so no new DNS lookup happens if no new connection is created. [2]
  • When a new connection is created, resolution happens again via the OS resolver path (so whether results are cached/for how long is OS-dependent). [1]
  • Implication for DNS changes / load balancers: if DNS for a hostname changes, an already-open pooled connection will keep talking to the old IP until that connection is dropped/closed and a new one is created. This is a consequence of pooling/keep-alive behavior. [2]
  • To “refresh” which IP you hit: force new connections (e.g., close the requests.Session() / adapter, or disable reuse), or implement DNS caching/overrides outside of requests/urllib3 (OS-level caching or custom resolver approaches). It’s commonly noted that requests/urllib3 themselves don’t cache DNS. [3][4]

Sources:
[1] urllib3.util.connection.create_connection uses socket.getaddrinfo() (no library DNS cache) — https://dokk.org/documentation/urllib3/1.6/_modules/urllib3/util/connection/ (dokk.org)
[2] Requests docs: keep-alive is automatic within a Session (connection pooling/reuse) — https://docs.python-requests.org/_/downloads/en/v2.7.0/pdf/ (docs.python-requests.org)
[3] AWS re:Invent slide explicitly stating requests/urllib3 don’t cache DNS — https://reinvent.awsevents.com/content/dam/reinvent/2024/slides/svs/SVS218-NEW_Accelerate-Python-and-NET-Lambda-functions-with-SnapStart.pdf (reinvent.awsevents.com)
[4] Discussion noting requests (via urllib3) calls getaddrinfo for hostname resolution and suggests OS/app-level DNS caching if needed — https://stackoverflow.com/questions/45479393/connection-error-while-using-requests-to-get-response-from-google-distance-matri (stackoverflow.com)

Citations:


The SSRF guard is bypassable via DNS rebinding—a real TOCTOU window exists in the validation approach.

validate_url_ip() resolves the hostname via socket.getaddrinfo() to check the IP, but requests.get() must resolve the same hostname again when opening the socket. Because urllib3 (underlying requests) performs no DNS caching and each socket creation triggers an independent resolver call, an attacker-controlled DNS server can return a public IP during validation, then serve a private IP on the next lookup—allowing the actual request to hit internal addresses. This same TOCTOU gap applies to each redirect hop (line 106 validates, line 108-110 requests). A proper fix requires either binding the socket connect to the pre-validated IP address or enforcing egress controls outside the application layer.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@apps/api/plane/bgtasks/work_item_link_task.py` around lines 91 - 107, The
current validate_url_ip() check is vulnerable to DNS rebinding TOCTOU because
requests/resolver is called again when opening sockets; fix by making the HTTP
connection use the pre-resolved IP instead of letting requests re-resolve
hostnames: resolve the hostname once (reuse validate_url_ip() to return the
vetted IP for each URL/hop), and then perform requests by connecting to that IP
while setting the original hostname in the Host header (or using a custom
TransportAdapter/ConnectionPool that calls create_connection with the resolved
IP) so the socket connect cannot re-resolve; apply this for the initial fetch
and for each redirect hop in work_item_link_task.py (use the resolved_ip
returned by validate_url_ip() when issuing requests.get and when following
redirects), or alternatively enforce egress firewall rules outside the app if
binding to IP is not possible.

)

return response, current_url


def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]:
"""
Crawls a URL to extract the title and favicon.
Expand All @@ -86,35 +132,19 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]:
title = None
final_url = url

validate_url_ip(final_url)

try:
# Manually follow redirects to validate each URL before requesting
redirect_count = 0
response = requests.get(final_url, headers=headers, timeout=1, allow_redirects=False)

while response.is_redirect and redirect_count < MAX_REDIRECTS:
redirect_url = response.headers.get("Location")
if not redirect_url:
break
# Resolve relative redirects against current URL
final_url = urljoin(final_url, redirect_url)
# Validate the redirect target BEFORE making the request
validate_url_ip(final_url)
redirect_count += 1
response = requests.get(final_url, headers=headers, timeout=1, allow_redirects=False)

if redirect_count >= MAX_REDIRECTS:
logger.warning(f"Too many redirects for URL: {url}")
response, final_url = safe_get(url, headers=headers)

soup = BeautifulSoup(response.content, "html.parser")
title_tag = soup.find("title")
title = title_tag.get_text().strip() if title_tag else None

except requests.RequestException as e:
logger.warning(f"Failed to fetch HTML for title: {str(e)}")
except (ValueError, RuntimeError) as e:
logger.warning(f"URL validation failed: {str(e)}")

# Fetch and encode favicon using final URL (after redirects)
# Fetch and encode favicon using final URL (after redirects) for correct relative href resolution
favicon_base64 = fetch_and_encode_favicon(headers, soup, final_url)

# Prepare result
Expand Down Expand Up @@ -204,9 +234,7 @@ def fetch_and_encode_favicon(
"favicon_base64": f"data:image/svg+xml;base64,{DEFAULT_FAVICON}",
}

validate_url_ip(favicon_url)

response = requests.get(favicon_url, headers=headers, timeout=1)
response, _ = safe_get(favicon_url, headers=headers)

# Get content type
content_type = response.headers.get("content-type", "image/x-icon")
Expand Down
126 changes: 126 additions & 0 deletions apps/api/plane/tests/unit/bg_tasks/test_work_item_link_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Copyright (c) 2023-present Plane Software, Inc. and contributors
# SPDX-License-Identifier: AGPL-3.0-only
# See the LICENSE file for details.

import pytest
from unittest.mock import patch, MagicMock
from plane.bgtasks.work_item_link_task import safe_get, validate_url_ip


def _make_response(status_code=200, headers=None, is_redirect=False, content=b""):
"""Create a mock requests.Response."""
resp = MagicMock()
resp.status_code = status_code
resp.is_redirect = is_redirect
resp.headers = headers or {}
resp.content = content
return resp


@pytest.mark.unit
class TestValidateUrlIp:
"""Test validate_url_ip blocks private/internal IPs."""

def test_rejects_private_ip(self):
with patch("plane.bgtasks.work_item_link_task.socket.getaddrinfo") as mock_dns:
mock_dns.return_value = [(None, None, None, None, ("192.168.1.1", 0))]
with pytest.raises(ValueError, match="private/internal"):
validate_url_ip("http://example.com")

def test_rejects_loopback(self):
with patch("plane.bgtasks.work_item_link_task.socket.getaddrinfo") as mock_dns:
mock_dns.return_value = [(None, None, None, None, ("127.0.0.1", 0))]
with pytest.raises(ValueError, match="private/internal"):
validate_url_ip("http://example.com")

def test_rejects_non_http_scheme(self):
with pytest.raises(ValueError, match="Only HTTP and HTTPS"):
validate_url_ip("file:///etc/passwd")

def test_allows_public_ip(self):
with patch("plane.bgtasks.work_item_link_task.socket.getaddrinfo") as mock_dns:
mock_dns.return_value = [(None, None, None, None, ("93.184.216.34", 0))]
validate_url_ip("https://example.com") # Should not raise


@pytest.mark.unit
class TestSafeGet:
"""Test safe_get follows redirects safely and blocks SSRF."""

@patch("plane.bgtasks.work_item_link_task.requests.get")
@patch("plane.bgtasks.work_item_link_task.validate_url_ip")
def test_returns_response_for_non_redirect(self, mock_validate, mock_get):
final_resp = _make_response(status_code=200, content=b"OK")
mock_get.return_value = final_resp

response, final_url = safe_get("https://example.com")

assert response is final_resp
assert final_url == "https://example.com"
mock_validate.assert_called_once_with("https://example.com")

@patch("plane.bgtasks.work_item_link_task.requests.get")
@patch("plane.bgtasks.work_item_link_task.validate_url_ip")
def test_follows_redirect_and_validates_each_hop(self, mock_validate, mock_get):
redirect_resp = _make_response(
status_code=301,
is_redirect=True,
headers={"Location": "https://other.com/page"},
)
final_resp = _make_response(status_code=200, content=b"OK")
mock_get.side_effect = [redirect_resp, final_resp]

response, final_url = safe_get("https://example.com")

assert response is final_resp
assert final_url == "https://other.com/page"
# Should validate both the initial URL and the redirect target
assert mock_validate.call_count == 2
mock_validate.assert_any_call("https://example.com")
mock_validate.assert_any_call("https://other.com/page")

@patch("plane.bgtasks.work_item_link_task.requests.get")
@patch("plane.bgtasks.work_item_link_task.validate_url_ip")
def test_blocks_redirect_to_private_ip(self, mock_validate, mock_get):
redirect_resp = _make_response(
status_code=302,
is_redirect=True,
headers={"Location": "http://192.168.1.1:8080"},
)
mock_get.return_value = redirect_resp
# First call (initial URL) succeeds, second call (redirect target) fails
mock_validate.side_effect = [None, ValueError("Access to private/internal networks is not allowed")]

with pytest.raises(ValueError, match="private/internal"):
safe_get("https://evil.com/redirect")

@patch("plane.bgtasks.work_item_link_task.requests.get")
@patch("plane.bgtasks.work_item_link_task.validate_url_ip")
def test_raises_on_too_many_redirects(self, mock_validate, mock_get):
redirect_resp = _make_response(
status_code=302,
is_redirect=True,
headers={"Location": "https://example.com/loop"},
)
mock_get.return_value = redirect_resp

with pytest.raises(RuntimeError, match="Too many redirects"):
safe_get("https://example.com/start")

@patch("plane.bgtasks.work_item_link_task.requests.get")
@patch("plane.bgtasks.work_item_link_task.validate_url_ip")
def test_succeeds_at_exact_max_redirects(self, mock_validate, mock_get):
"""After exactly MAX_REDIRECTS hops, if the final response is 200, it should succeed."""
redirect_resp = _make_response(
status_code=302,
is_redirect=True,
headers={"Location": "https://example.com/next"},
)
final_resp = _make_response(status_code=200, content=b"OK")
# 5 redirects then a 200
mock_get.side_effect = [redirect_resp] * 5 + [final_resp]

response, final_url = safe_get("https://example.com/start")

assert response is final_resp
assert not response.is_redirect
Loading