Permalink
Browse files

Paginate input to escape functions

Consume NIF timeslices in similar fashion to exml_event code
  • Loading branch information...
1 parent 3e9d223 commit 2db4a3fb849963118547627b9c108d14b649a1dc @paulgray committed Feb 24, 2014
Showing with 61 additions and 36 deletions.
  1. +14 −4 c_src/{exml_event.h → exml.h}
  2. +17 −9 c_src/exml_escape.c
  3. +0 −17 c_src/exml_escape.h
  4. +1 −1 c_src/exml_event.c
  5. +1 −1 c_src/exml_utils.c
  6. +21 −4 src/exml.erl
  7. +7 −0 test/exml_escape_tests.erl
@@ -1,11 +1,21 @@
-#ifndef EXML_EVENT_H
-#define EXML_EVENT_H
+#ifndef EXML_H
+#define EXML_H
#include <erl_nif.h>
-#include <expat.h>
-#include <assert.h>
+#include <string.h>
#include <stdio.h>
+#include <assert.h>
#include <string.h>
+#include <expat.h>
+
+#define EXML_CDATA_BUF_SIZE 1024
+#define EXML_ATTR_BUF_SIZE 64
+
+// functions 'exported' by exml_escape.c module
+ERL_NIF_TERM exml_escape_attr(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+ERL_NIF_TERM exml_unescape_attr(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+ERL_NIF_TERM exml_escape_cdata(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+ERL_NIF_TERM exml_unescape_cdata(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
// structure used as a private data by expat parser
typedef struct
View
@@ -1,4 +1,4 @@
-#include "exml_escape.h"
+#include "exml.h"
struct buf {
int limit;
@@ -59,7 +59,7 @@ static int match_tag(ErlNifBinary str, int index, char* tag, int len)
return(1);
}
-static ERL_NIF_TERM escape_cdata(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
+ERL_NIF_TERM exml_escape_cdata(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
ErlNifBinary input, output;
struct buf *rbuf;
@@ -102,10 +102,12 @@ static ERL_NIF_TERM escape_cdata(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
enif_alloc_binary(rbuf->len, &output);
memcpy(output.data, rbuf->b, rbuf->len);
destroy_buf(env, rbuf);
+ consume_timeslice(env, input);
+
return enif_make_binary(env, &output);
}
-static ERL_NIF_TERM unescape_cdata(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
+ERL_NIF_TERM exml_unescape_cdata(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
ErlNifBinary input, output;
struct buf *rbuf;
@@ -150,10 +152,12 @@ static ERL_NIF_TERM unescape_cdata(ErlNifEnv *env, int argc, const ERL_NIF_TERM
enif_alloc_binary(rbuf->len, &output);
memcpy(output.data, rbuf->b, rbuf->len);
destroy_buf(env, rbuf);
+ consume_timeslice(env, input);
+
return enif_make_binary(env, &output);
}
-static ERL_NIF_TERM escape_attr(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
+ERL_NIF_TERM exml_escape_attr(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
ErlNifBinary input, output;
struct buf *rbuf;
@@ -204,10 +208,12 @@ static ERL_NIF_TERM escape_attr(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
enif_alloc_binary(rbuf->len, &output);
memcpy(output.data, rbuf->b, rbuf->len);
destroy_buf(env, rbuf);
+ consume_timeslice(env, input);
+
return enif_make_binary(env, &output);
}
-static ERL_NIF_TERM unescape_attr(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
+ERL_NIF_TERM exml_unescape_attr(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
ErlNifBinary input, output;
struct buf *rbuf;
@@ -260,6 +266,8 @@ static ERL_NIF_TERM unescape_attr(ErlNifEnv *env, int argc, const ERL_NIF_TERM a
enif_alloc_binary(rbuf->len, &output);
memcpy(output.data, rbuf->b, rbuf->len);
destroy_buf(env, rbuf);
+ consume_timeslice(env, input);
+
return enif_make_binary(env, &output);
}
@@ -286,10 +294,10 @@ static void unload(ErlNifEnv* env, void* priv)
static ErlNifFunc nif_funcs[] =
{
- {"escape_attr_nif", 1, escape_attr},
- {"unescape_attr_nif", 1, unescape_attr},
- {"escape_cdata_nif", 1, escape_cdata},
- {"unescape_cdata_nif", 1, unescape_cdata}
+ {"escape_attr_nif", 1, exml_escape_attr},
+ {"unescape_attr_nif", 1, exml_unescape_attr},
+ {"escape_cdata_nif", 1, exml_escape_cdata},
+ {"unescape_cdata_nif", 1, exml_unescape_cdata}
};
ERL_NIF_INIT(exml, nif_funcs, &load, &reload, &upgrade, &unload);
View
@@ -1,17 +0,0 @@
-#ifndef EXML_ESCAPE_H
-#define EXML_ESCAPE_H
-
-#include <erl_nif.h>
-#include <string.h>
-#include <stdio.h>
-#include <assert.h>
-
-#define EXML_CDATA_BUF_SIZE 1024
-#define EXML_ATTR_BUF_SIZE 64
-
-static ERL_NIF_TERM escape_attr(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
-static ERL_NIF_TERM unescape_attr(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
-static ERL_NIF_TERM escape_cdata(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
-static ERL_NIF_TERM unescape_cdata(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
-
-#endif
View
@@ -1,4 +1,4 @@
-#include "exml_event.h"
+#include "exml.h"
// pre-allocated Erlang atoms used commonly within the driver
static ERL_NIF_TERM XML_ELEMENT_START;
View
@@ -1,4 +1,4 @@
-#include "exml_event.h"
+#include "exml.h"
/* This should correspond to the similar define in exml_event.erl */
/* Current value is: erlang:system_info(context_reductions) * 10 */
View
@@ -17,6 +17,10 @@
escape_cdata/1, unescape_cdata/1, unescape_cdata_as/2]).
-on_load(load/0).
+%% Maximum bytes passed to the NIF handler at once
+%% Current value is erlang:system_info(context_reductions) * 10
+-define(MAX_BYTES_TO_NIF, 20000).
+
-spec load() -> any().
load() ->
PrivDir = case code:priv_dir(?MODULE) of
@@ -117,11 +121,15 @@ parse(XML) ->
-spec escape_cdata(iodata()) -> #xmlcdata{}.
escape_cdata(Content) ->
- #xmlcdata{content = escape_cdata_nif(Content)}.
+ BContent = list_to_binary([Content]),
+ NewContent = feed_nif(fun escape_cdata_nif/1, BContent,
+ byte_size(BContent), []),
+ #xmlcdata{content = NewContent}.
-spec unescape_cdata(#xmlcdata{}) -> binary().
unescape_cdata(#xmlcdata{content = Content}) ->
- unescape_cdata_nif(Content).
+ BContent = list_to_binary([Content]),
+ feed_nif(fun unescape_cdata_nif/1, BContent, byte_size(BContent), []).
-spec unescape_cdata_as(binary|list|iodata, #xmlcdata{}) -> binary().
unescape_cdata_as(What, CData) ->
@@ -144,11 +152,20 @@ unescape_cdata_as_erl(What, #xmlcdata{content=GtEsc}) ->
-spec escape_attr(binary()) -> binary().
escape_attr(Text) ->
- escape_attr_nif(Text).
+ feed_nif(fun escape_attr_nif/1, Text, byte_size(Text), []).
-spec unescape_attr(binary()) -> binary().
unescape_attr(Text) ->
- unescape_attr_nif(Text).
+ feed_nif(fun unescape_attr_nif/1, Text, byte_size(Text), []).
+
+-spec feed_nif(function(), binary(), integer(), list()) -> binary().
+feed_nif(Fun, Text, Size, Acc) when Size > ?MAX_BYTES_TO_NIF ->
+ <<Chunk:?MAX_BYTES_TO_NIF/binary, Rest/binary>> = Text,
+ Resp = Fun(Chunk),
+ feed_nif(Fun, Rest, Size - ?MAX_BYTES_TO_NIF, [Resp | Acc]);
+feed_nif(Fun, Text, _Size, Acc) ->
+ Resp = Fun(Text),
+ list_to_binary(lists:reverse([Resp | Acc])).
-spec escape_attr_nif(binary()) -> binary().
escape_attr_nif(_Data) ->
@@ -58,6 +58,13 @@ unescape_cdata_test() ->
assert_unescape_cdata(<<"&amp">>, <<"&amp">>),
assert_unescape_cdata(<<"&amm;">>, <<"&amm;">>).
+large_escape_test() ->
+ Pass = list_to_binary(lists:duplicate(20001, <<"&">>)),
+ Expected = list_to_binary(lists:duplicate(20001, <<"&amp;">>)),
+ assert_escape_attr(Expected, Pass),
+ assert_unescape_attr(Pass, Expected),
+ assert_escape_cdata(Expected, Pass),
+ assert_unescape_cdata(Pass, Expected).
assert_escape_attr(EscapedText, Text) ->
?assertEqual(EscapedText, exml:escape_attr(Text)),

0 comments on commit 2db4a3f

Please sign in to comment.