Skip to content

Commit

Permalink
Merge pull request #1203 from tahoe-lafs/3872-resource-limits-http-st…
Browse files Browse the repository at this point in the history
…orage

A variety of CPU and memory resource limits for the HTTP storage client and server

Fixes ticket:3872
  • Loading branch information
itamarst committed Jul 15, 2022
2 parents a78f504 + a24aefa commit 69f90e6
Show file tree
Hide file tree
Showing 4 changed files with 470 additions and 149 deletions.
Empty file added newsfragments/3872.minor
Empty file.
54 changes: 47 additions & 7 deletions src/allmydata/storage/http_client.py
Expand Up @@ -4,8 +4,9 @@

from __future__ import annotations

from typing import Union, Optional, Sequence, Mapping
from typing import Union, Optional, Sequence, Mapping, BinaryIO
from base64 import b64encode
from io import BytesIO

from attrs import define, asdict, frozen, field

Expand All @@ -17,7 +18,7 @@
from twisted.web.http_headers import Headers
from twisted.web import http
from twisted.web.iweb import IPolicyForHTTPS
from twisted.internet.defer import inlineCallbacks, returnValue, fail, Deferred
from twisted.internet.defer import inlineCallbacks, returnValue, fail, Deferred, succeed
from twisted.internet.interfaces import IOpenSSLClientConnectionCreator
from twisted.internet.ssl import CertificateOptions
from twisted.web.client import Agent, HTTPConnectionPool
Expand Down Expand Up @@ -114,19 +115,58 @@ def __init__(self, code, *additional_args):
}


@define
class _LengthLimitedCollector:
"""
Collect data using ``treq.collect()``, with limited length.
"""

remaining_length: int
f: BytesIO = field(factory=BytesIO)

def __call__(self, data: bytes):
self.remaining_length -= len(data)
if self.remaining_length < 0:
raise ValueError("Response length was too long")
self.f.write(data)


def limited_content(response, max_length: int = 30 * 1024 * 1024) -> Deferred[BinaryIO]:
"""
Like ``treq.content()``, but limit data read from the response to a set
length. If the response is longer than the max allowed length, the result
fails with a ``ValueError``.
A potentially useful future improvement would be using a temporary file to
store the content; since filesystem buffering means that would use memory
for small responses and disk for large responses.
"""
collector = _LengthLimitedCollector(max_length)
# Make really sure everything gets called in Deferred context, treq might
# call collector directly...
d = succeed(None)
d.addCallback(lambda _: treq.collect(response, collector))

def done(_):
collector.f.seek(0)
return collector.f

d.addCallback(done)
return d


def _decode_cbor(response, schema: Schema):
"""Given HTTP response, return decoded CBOR body."""

def got_content(data):
def got_content(f: BinaryIO):
data = f.read()
schema.validate_cbor(data)
return loads(data)

if response.code > 199 and response.code < 300:
content_type = get_content_type(response.headers)
if content_type == CBOR_MIME_TYPE:
# TODO limit memory usage
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872
return treq.content(response).addCallback(got_content)
return limited_content(response).addCallback(got_content)
else:
raise ClientException(-1, "Server didn't send CBOR")
else:
Expand Down Expand Up @@ -295,7 +335,7 @@ def request(
write_enabler_secret=None,
headers=None,
message_to_serialize=None,
**kwargs
**kwargs,
):
"""
Like ``treq.request()``, but with optional secrets that get translated
Expand Down

0 comments on commit 69f90e6

Please sign in to comment.