Skip to content

Commit

Permalink
Tilde encoding now encodes space as plus, closes #1701
Browse files Browse the repository at this point in the history
Refs #1657
  • Loading branch information
simonw committed Apr 6, 2022
1 parent df88d03 commit 90d1be9
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 7 deletions.
12 changes: 10 additions & 2 deletions datasette/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1113,12 +1113,20 @@ def add_cors_headers(headers):
# '.' and '~'
)

_space = ord(" ")


class TildeEncoder(dict):
# Keeps a cache internally, via __missing__
def __missing__(self, b):
print("b is ", b)
# Handle a cache miss, store encoded string in cache and return.
res = chr(b) if b in _TILDE_ENCODING_SAFE else "~{:02X}".format(b)
if b in _TILDE_ENCODING_SAFE:
res = chr(b)
elif b == _space:
res = "+"
else:
res = "~{:02X}".format(b)
self[b] = res
return res

Expand All @@ -1138,7 +1146,7 @@ def tilde_decode(s: str) -> str:
# Avoid accidentally decoding a %2f style sequence
temp = secrets.token_hex(16)
s = s.replace("%", temp)
decoded = urllib.parse.unquote(s.replace("~", "%"))
decoded = urllib.parse.unquote_plus(s.replace("~", "%"))
return decoded.replace(temp, "%")


Expand Down
6 changes: 4 additions & 2 deletions docs/internals.rst
Original file line number Diff line number Diff line change
Expand Up @@ -980,15 +980,17 @@ Datasette uses a custom encoding scheme in some places, called **tilde encoding*

Tilde encoding uses the same algorithm as `URL percent-encoding <https://developer.mozilla.org/en-US/docs/Glossary/percent-encoding>`__, but with the ``~`` tilde character used in place of ``%``.

Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz 0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example:
Any character other than ``ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz0123456789_-`` will be replaced by the numeric equivalent preceded by a tilde. For example:

- ``/`` becomes ``~2F``
- ``.`` becomes ``~2E``
- ``%`` becomes ``~25``
- ``~`` becomes ``~7E``
- Space character becomes ``~20``
- Space becomes ``+``
- ``polls/2022.primary`` becomes ``polls~2F2022~2Eprimary``

Note that the space character is a special case: it will be replaced with a ``+`` symbol.

.. _internals_utils_tilde_encode:

.. autofunction:: datasette.utils.tilde_encode
Expand Down
6 changes: 3 additions & 3 deletions tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_homepage(app_client_two_attached_databases):
)
# Should be two attached databases
assert [
{"href": "/extra~20database", "text": "extra database"},
{"href": "/extra+database", "text": "extra database"},
{"href": "/fixtures", "text": "fixtures"},
] == [{"href": a["href"], "text": a.text.strip()} for a in soup.select("h2 a")]
# Database should show count text and attached tables
Expand All @@ -43,8 +43,8 @@ def test_homepage(app_client_two_attached_databases):
{"href": a["href"], "text": a.text.strip()} for a in links_p.findAll("a")
]
assert [
{"href": r"/extra~20database/searchable", "text": "searchable"},
{"href": r"/extra~20database/searchable_view", "text": "searchable_view"},
{"href": r"/extra+database/searchable", "text": "searchable"},
{"href": r"/extra+database/searchable_view", "text": "searchable_view"},
] == table_links


Expand Down
1 change: 1 addition & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,7 @@ async def test_derive_named_parameters(sql, expected):
("-/db-/table.csv", "-~2Fdb-~2Ftable~2Ecsv"),
(r"%~-/", "~25~7E-~2F"),
("~25~7E~2D~2F", "~7E25~7E7E~7E2D~7E2F"),
("with space", "with+space"),
),
)
def test_tilde_encoding(original, expected):
Expand Down

0 comments on commit 90d1be9

Please sign in to comment.