Skip to content

Commit

Permalink
Set a library-specific user agent when automatically retrieving $refs.
Browse files Browse the repository at this point in the history
This behavior is (already) deprecated, but oddly there are "real world"
webservers which seem to have allowed the `requests` default user agent
(i.e. respond correctly) but not the `urllib` default vague `Python/3.x`
one (i.e. respond with a 403 or whatever).

Since it seems polite to set this anyhow, we may as well fix behavior
for anyone who happens to be encountering such a webserver.
  • Loading branch information
Julian committed Aug 2, 2023
1 parent 4fdc365 commit 4e999e0
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 5 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
@@ -1,3 +1,8 @@
v4.18.6
=======

* Set a ``jsonschema`` specific user agent when automatically retrieving remote references (which is deprecated).

v4.18.5
=======

Expand Down
52 changes: 49 additions & 3 deletions jsonschema/tests/test_deprecations.py
@@ -1,7 +1,11 @@
from unittest import TestCase
import importlib
from contextlib import contextmanager
from io import BytesIO
from unittest import TestCase, mock
import importlib.metadata
import json
import subprocess
import sys
import urllib.request

import referencing.exceptions

Expand All @@ -16,8 +20,9 @@ def test_version(self):

message = "Accessing jsonschema.__version__ is deprecated"
with self.assertWarnsRegex(DeprecationWarning, message) as w:
from jsonschema import __version__ # noqa: F401
from jsonschema import __version__

self.assertEqual(__version__, importlib.metadata.version("jsonschema"))
self.assertEqual(w.filename, __file__)

def test_validators_ErrorTree(self):
Expand Down Expand Up @@ -357,3 +362,44 @@ def test_cli(self):
capture_output=True,
)
self.assertIn(b"The jsonschema CLI is deprecated ", process.stderr)

def test_automatic_remote_retrieval(self):
"""
Automatic retrieval of remote references is deprecated as of v4.18.0.
"""
ref = "http://bar#/$defs/baz"
schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$defs": {"baz": {"type": "integer"}},
}

if "requests" in sys.modules: # pragma: no cover
self.addCleanup(
sys.modules.__setitem__, "requests", sys.modules["requests"],
)
sys.modules["requests"] = None

@contextmanager
def fake_urlopen(request):
self.assertIsInstance(request, urllib.request.Request)
self.assertEqual(request.full_url, "http://bar")

# Ha ha urllib.request.Request "normalizes" header names and
# Request.get_header does not also normalize them...
(header, value), = request.header_items()
self.assertEqual(header.lower(), "user-agent")
self.assertEqual(
value, "python-jsonschema (deprecated $ref resolution)",
)
yield BytesIO(json.dumps(schema).encode("utf8"))

validator = validators.Draft202012Validator({"$ref": ref})

message = "Automatically retrieving remote references "
patch = mock.patch.object(urllib.request, "urlopen", new=fake_urlopen)

with patch, self.assertWarnsRegex(DeprecationWarning, message):
self.assertEqual(
(validator.is_valid({}), validator.is_valid(37)),
(False, True),
)
6 changes: 4 additions & 2 deletions jsonschema/validators.py
Expand Up @@ -103,8 +103,10 @@ def _validates(cls):


def _warn_for_remote_retrieve(uri: str):
from urllib.request import urlopen
with urlopen(uri) as response:
from urllib.request import Request, urlopen
headers = {"User-Agent": "python-jsonschema (deprecated $ref resolution)"}
request = Request(uri, headers=headers)
with urlopen(request) as response:
warnings.warn(
"Automatically retrieving remote references can be a security "
"vulnerability and is discouraged by the JSON Schema "
Expand Down

0 comments on commit 4e999e0

Please sign in to comment.