Skip to content

Commit

Permalink
app/uri: add url encode and decode methods
Browse files Browse the repository at this point in the history
Lua FFI bindings of curl_easy_escape() [1] and curl_easy_unescape() [2].

1. https://curl.se/libcurl/c/curl_easy_escape.html
2. https://curl.se/libcurl/c/curl_easy_unescape.html

@TarantoolBot document
Title: Document a new methods to encode and decode URL's

New methods "uri:encode()" and "uri:decode()" have been introduced.
First one allows to escape symbols in a string and second one to
unescape symbols in a string. Escaping and unescaping symbols
implemented using CURL functions curl_easy_escape() and
curl_easy_unescape() and conforms to RFC 3986. Maximum length of string
is limited with CURL_MAX_INPUT_LENGTH (8 MB).

```
tarantool> require('uri').encode('тарантул')
---
- '%D1%82%D0%B0%D1%80%D0%B0%D0%BD%D1%82%D1%83%D0%BB'
...

tarantool>
```

Fixes tarantool#3682
  • Loading branch information
ligurio committed Jun 9, 2022
1 parent a6818ac commit 6a20950
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 1 deletion.
13 changes: 13 additions & 0 deletions changelogs/unreleased/gh-3682-uri-encode-decode.md
@@ -0,0 +1,13 @@
## feature/lua/uri

* Added functions for encoding and decoding URL string according to
(gh-3682).

```lua
tarantool> require('uri').encode('тарантул')
---
- '%D1%82%D0%B0%D1%80%D0%B0%D0%BD%D1%82%D1%83%D0%BB'
...

tarantool
```
67 changes: 67 additions & 0 deletions src/lua/uri.lua
Expand Up @@ -4,6 +4,8 @@ local ffi = require('ffi')
local buffer = require('buffer')
local uri = require('uri')

local url_decode_outlength = ffi.new("int[1]")

ffi.cdef[[
struct uri_param {
const char *name;
Expand Down Expand Up @@ -45,6 +47,13 @@ uri_set_destroy(struct uri_set *uri_set);

int
uri_format(char *str, size_t len, struct uri *uri, bool write_password);

typedef void CURL;
CURL *curl_easy_init(void);
void curl_easy_cleanup(CURL *handle);
char *curl_easy_escape(CURL *handle, const char *string, int length);
char *curl_easy_unescape(CURL *handle, const char *string, int length, int *outlength);
void curl_free(void *p);
]]

local builtin = ffi.C;
Expand Down Expand Up @@ -169,8 +178,66 @@ local function format(uri, write_password)
return str
end

--- Encodes the given string
-- Uses Curl function curl_easy_escape(),
-- see description in https://curl.haxx.se/libcurl/c/curl_easy_escape.html
-- @function url_encode
-- @string buf - string to be encode
-- @returns result string or nil, err
local function url_encode(buf)
if type(buf) ~= 'string' then
error("Usage: uri.encode(string)")
end

local handle = ffi.C.curl_easy_init()
if not handle then
return nil, 'curl_easy_init error'
end

local escaped_str = ffi.C.curl_easy_escape(handle, buf, #buf)
ffi.C.curl_easy_cleanup(handle)
if escaped_str == nil then
return nil, 'curl_easy_escape error'
end

local res = ffi.string(escaped_str)
ffi.C.curl_free(escaped_str)

return res
end

--- Decodes the given string
-- Uses Curl function curl_easy_unescape(),
-- see description in https://curl.haxx.se/libcurl/c/curl_easy_unescape.html
-- @function url_decode
-- @string buf - encoded URL
-- @returns string or nil, err
local function url_decode(buf)
if type(buf) ~= 'string' then
error("Usage: uri.decode(string)" .. type(buf))
end

local handle = builtin.curl_easy_init()
if not handle then
return nil, 'curl_easy_init error'
end

local unescaped_str = builtin.curl_easy_unescape(handle, buf, #buf, url_decode_outlength)
builtin.curl_easy_cleanup(handle)
if unescaped_str == nil then
return nil, 'curl_easy_unescape error'
end

local res = ffi.string(unescaped_str, url_decode_outlength[0])
builtin.curl_free(unescaped_str)

return res
end

return {
parse_many = parse_many,
parse = parse,
format = format,
encode = url_encode,
decode = url_decode,
};
85 changes: 84 additions & 1 deletion test/app-tap/uri.test.lua
Expand Up @@ -743,12 +743,95 @@ local function test_parse_invalid_uri_set_from_lua_table(test)
test:is(tostring(error), expected_errmsg, "error message")
end

local url_encode_test = {
{ "a", "a" },
{ "/", "%2F" },
{ "a=b", "a%3Db" },
{ "1/./0", "1%2F.%2F0" },
{ "-._~!#%&", "-._~%21%23%25%26" },
{ "тарантул", "%D1%82%D0%B0%D1%80%D0%B0%D0%BD%D1%82%D1%83%D0%BB" },
}

local url_decode_test = {
{ "a", "a" },
{ "a=b", "a%3Db" },
{ "a=", "a%3D" },
{ "1/./0", "1%2F.%2F0" },
{ "-._~!#%&", "-._~%21%23%25%26" },
{ "a", "%61" },
{ "aa", "%61a" },
{ "ab", "%61b" },
{ "%6 1", "%6 1" },
{ "%6%a", "%6%a" },
{ "j", "%6a" },
{ "%-2", "%-2" },
{ "%FG", "%FG" },
{ "/", "%2F" },
{ "a", "%61" },
{ "aa", "%61a" },
{ "ab", "%61b" },
{ "%6 1", "%6 1" },
{ "%6%a", "%6%a" },
{ "j", "%6a" },
{ "%-2", "%-2" },
{ "%FG", "%FG" },
}

local function test_url_encode(test)
local len = table.getn(url_encode_test)
test:plan(len)
for _, testcase in pairs(url_encode_test) do
local raw = testcase[1]
local encoded = testcase[2]
local desc = ('encoding of "%s"'):format(raw)
test:is(encoded, uri.encode(raw), desc)
end
end

local function test_url_decode(test)
local len = table.getn(url_decode_test)
test:plan(len)
for _, testcase in pairs(url_decode_test) do
local raw = testcase[1]
local encoded = testcase[2]
local desc = ('decoding of "%s"'):format(encoded)
test:is(raw, uri.decode(encoded), desc)
end
end

local function test_url_encode_decode_roundtrip(test)
test:plan(14)

test:is('hello', uri.encode(uri.decode('hello')), 'decoding and encoding of "hello"')
test:is('привет', uri.decode(uri.encode('привет')), 'encoding and decoding of "привет"')

test:is('%00', uri.encode(uri.decode('%00')), 'decoding and encoding of %00')
test:is('\0', uri.decode(uri.encode('\0')), 'encoding and decoding of \\0')

test:is('hello', uri.encode('hello'), 'correct encoding of eng')
test:is('%D0%BF%D1%80%D0%B8%D0%B2%D0%B5%D1%82', uri.encode('привет'), 'correct encoding of rus')
test:is('привет', uri.decode(uri.encode('привет')), 'encoding and decoding of "привет"')
test:is('\0', uri.decode(uri.encode('\0')), 'encoding and decoding of \\0')

test:is('hello', uri.decode('hello'), 'correct decoding of eng')
test:is('привет', uri.decode('%D0%BF%D1%80%D0%B8%D0%B2%D0%B5%D1%82'), 'correct decoding of rus')

test:is('hello', uri.encode(uri.decode('hello')), 'decoding and encoding of "hello"')
test:is('привет', uri.decode(uri.encode('привет')), 'encoding and decoding of "привет"')

test:is('%00', uri.encode(uri.decode('%00')), 'decoding and encoding of %00')
test:is('\0', uri.decode(uri.encode('\0')), 'encoding and decoding of \\0')
end

tap.test("uri", function(test)
test:plan(6)
test:plan(9)
test:test("parse", test_parse)
test:test("parse URI query params", test_parse_uri_query_params)
test:test("parse URIs with query params", test_parse_uri_set_with_query_params)
test:test("parse URIs from lua table", test_parse_uri_set_from_lua_table)
test:test("parse invalid URIs from lua table", test_parse_invalid_uri_set_from_lua_table)
test:test("format", test_format)
test:test("encode uri", test_url_encode)
test:test("decode uri", test_url_decode)
test:test("encode and decode uri (roundtrip)", test_url_encode_decode_roundtrip)
end)

0 comments on commit 6a20950

Please sign in to comment.