Skip to content

Commit

Permalink
Fixed backslash escaped characters in QNames of SPARQL queries. (Bug1…
Browse files Browse the repository at this point in the history
…6599)
  • Loading branch information
IvanMikhailov authored and VOS maintainer committed Mar 28, 2019
1 parent 4d37f41 commit 6f46c8b
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 54 deletions.
8 changes: 6 additions & 2 deletions libsrc/Wi/json.l
Expand Up @@ -23,6 +23,7 @@

%option 8bit
%option never-interactive
%option noyywrap
%option nounput
%{
#include <ctype.h>
Expand All @@ -39,7 +40,10 @@ int jsonyy_string_input (char *buf, int max);
#define jsonyyerror(str) jsonyyerror_impl(str)

struct sparp_s; /* forward */
extern caddr_t spar_strliteral (struct sparp_s *sparp, const char *strg, int strg_is_long, int is_json);
#define SPAR_STRLITERAL_SPARQL_STRING 0
#define SPAR_STRLITERAL_JSON_STRING 1
#define SPAR_STRLITERAL_SPARQL_QNAME 2
extern caddr_t spar_unescape_strliteral (struct sparp_s *sparp, const char *strg, int count_of_quotes, int mode);
extern int json_line;
%}

Expand Down Expand Up @@ -72,7 +76,7 @@ HEX ([0-9A-Fa-f])

<STRLIT>[^\\\"\n\r\t]*"\"" {
BEGIN(INITIAL);
jsonyylval.box = spar_strliteral (NULL /* no sparp for JSON_LITERAL */, jsonyytext, 0, 1);
jsonyylval.box = spar_unescape_strliteral (NULL /* no sparp for JSON_LITERAL */, jsonyytext, 1, SPAR_STRLITERAL_JSON_STRING);
return STRING;
}

Expand Down
36 changes: 23 additions & 13 deletions libsrc/Wi/scn3.l
Expand Up @@ -391,16 +391,21 @@ extern int yylex (YYSTYPE *yylval, yyscan_t yyscanner);
S_NL (\r\n|\n|\r)
SPAR_SQ_PLAIN ([^\\''\r\n])
SPAR_DQ_PLAIN ([^\\""\r\n])
SPAR_ECHAR ([\\]([atbvnrf\\""'']|("u"{HEX}{HEX}{HEX}{HEX})|("U"{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX})))
HEX ([0-9A-Fa-f])

SPAR_NCCHAR1p ([A-Za-z])
SPAR_NCCHAR1 ([A-Za-z_])
SPAR_VARNAME ([A-Za-z0-9_]+)
SPAR_NCCHAR ([A-Za-z0-9_-])
SPAR_NCNAME_PREFIX ({SPAR_NCCHAR1p}([A-Za-z0-9_.-]*{SPAR_NCCHAR})?)
SPAR_NCNAME ({SPAR_NCCHAR1}([A-Za-z0-9_.-]*{SPAR_NCCHAR})?)

SPAR_UCHAR ([\\](("u"{HEX}{HEX}{HEX}{HEX})|("U"{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX})))
SPAR_ECHAR (([\\][atbvnrf\\""''])|{SPAR_UCHAR})
HEX ([0-9A-Fa-f])

SPAR_VARNAME ([A-Za-z0-9_][A-Za-z0-9_\x7f-\xfe]*)

PN_LOCAL_ESC ([\\][_~.!$&''()*+,;=/?#@%-])
PN_LOCAL_ESC_X ({PN_LOCAL_ESC}|([%]{HEX}{HEX}))
PN_CHARS_BASE ([A-Za-z\x7f-\xfe]|{SPAR_UCHAR})
PN_CHARS_U_09 ([A-Za-z0-9_\x7f-\xfe]|{SPAR_UCHAR})
PN_CHARS_U_09_C_PLX ([A-Za-z0-9_\x7f-\xfe:]|{SPAR_UCHAR}|{PN_LOCAL_ESC_X})
PN_CHARS ([A-Za-z0-9_\x7f-\xfe-]|{SPAR_UCHAR})
PN_CHARS_C_PLX ([A-Za-z0-9_\x7f-\xfe:-]|{SPAR_UCHAR}|{PN_LOCAL_ESC_X})
PN_PREFIX ({PN_CHARS_BASE}(([.]*{PN_CHARS})*))
PN_LOCAL ({PN_CHARS_U_09_C_PLX}(([.]*{PN_CHARS_C_PLX})*))

%%

Expand Down Expand Up @@ -873,7 +878,7 @@ EXEC[ \t]+SQL { sqlp_bin_op_serial = 0; BEGIN SQL; }
t_set_push (&global_scs->scs_scn3c.namespaces, NULL);
RETURN_WS(WS_PRAGMA_PREFIX_1); }

<PRAGMA_PREFIX>({SPAR_NCNAME_PREFIX}?)":" {
<PRAGMA_PREFIX>({PN_PREFIX}?)":" {
BEGIN(PRAGMA_PREFIX_2);
global_scs->scs_scn3c.namespaces->data = t_box_dv_uname_nchars (yytext, strlen (yytext) - 1);
RETURN_WS(WS_PRAGMA_PREFIX_2); }
Expand All @@ -882,7 +887,7 @@ EXEC[ \t]+SQL { sqlp_bin_op_serial = 0; BEGIN SQL; }
const char *langl = strchr (yytext, '<');
BEGIN(SQL);
global_scs->scs_scn3c.namespaces->next->data = t_box_dv_uname_nchars (langl + 1, (yytext + yyleng - 2) - langl);
RETURN_WS(WS_PRAGMA_PREFIX_3); }
RETURN_WS(WS_PRAGMA_PREFIX_3); }

<PRAGMA_PREFIX>. { scn3yyerror ("Ill formed namespace prefix in #pragma prefix"); }
<PRAGMA_PREFIX><<EOF>> { scn3yyerror ("Unexpected end of text in #pragma prefix"); }
Expand Down Expand Up @@ -1016,6 +1021,7 @@ EXEC[ \t]+SQL { sqlp_bin_op_serial = 0; BEGIN SQL; }
<SPARQL_SKIP>"}" TOKCLOSE_SKIP ('}','}');
<SPARQL_SKIP>"[" TOKOPEN_SKIP ('[',']');
<SPARQL_SKIP>"]" TOKCLOSE_SKIP (']',']');

<SPARQL_SKIP>"<"([^<>"{}|^`\001-\040\\])*">" { TOK_SKIP; }
<SPARQL_SKIP>([""][^""\\\n]*[""])|([''][^''\\\n]*['']) { TOK_SKIP; }

Expand Down Expand Up @@ -1048,7 +1054,11 @@ EXEC[ \t]+SQL { sqlp_bin_op_serial = 0; BEGIN SQL; }
<SPARQL_SQ_SKIP><<EOF>> { scn3yyerror ("Unterminated SPARQL short single-quoted string"); }
<SPARQL_DQ_SKIP><<EOF>> { scn3yyerror ("Unterminated SPARQL short double-quoted string"); }

<SPARQL_SKIP>[^#''""\\\n\r(){}\[\];<>]+ TOK_SKIP;
<SPARQL_SKIP>({PN_PREFIX}?)":"{PN_LOCAL} TOK_SKIP;
<SPARQL_SKIP>({PN_PREFIX}?)":" TOK_SKIP;
<SPARQL_SKIP>"_:"{PN_LOCAL} TOK_SKIP;
<SPARQL_SKIP>[^#''""\\\n\r(){}\[\];:<>]+ TOK_SKIP;

<SPARQL_SKIP><<EOF>> {
#ifndef SCN3SPLIT
scn3_include_fragment_t *outer = global_scs->scs_scn3c.include_stack + global_scs->scs_scn3c.include_depth;
Expand Down
5 changes: 4 additions & 1 deletion libsrc/Wi/sparql.h
Expand Up @@ -931,7 +931,10 @@ extern caddr_t sparp_graph_sec_id_to_iri_nosignal (sparp_t *sparp, iri_id_t iid)
extern caddr_t sparp_iri_to_id_nosignal (sparp_t *sparp, ccaddr_t qname); /*!< returns t_boxed IRI_ID or plain NULL pointer */
extern ccaddr_t sparp_id_to_iri (sparp_t *sparp, iri_id_t iid); /*!< returns t_boxed string or plain NULL pointer */

extern caddr_t spar_strliteral (sparp_t *sparp, const char *sparyytext, int strg_is_long, int is_json);
#define SPAR_STRLITERAL_SPARQL_STRING 0
#define SPAR_STRLITERAL_JSON_STRING 1
#define SPAR_STRLITERAL_SPARQL_QNAME 2
extern caddr_t spar_unescape_strliteral (sparp_t *sparp, const char *sparyytext, int count_of_quotes, int mode);
extern caddr_t spar_mkid (sparp_t * sparp, const char *prefix);
extern void spar_change_sign (caddr_t *lit_ptr);

Expand Down
25 changes: 14 additions & 11 deletions libsrc/Wi/sparql_core.c
Expand Up @@ -827,32 +827,32 @@ sparp_id_to_iri (sparp_t *sparp, iri_id_t iid)
return NULL; /* to keep compiler happy */
}

caddr_t spar_strliteral (sparp_t *sparp, const char *strg, int strg_is_long, int is_json)
caddr_t spar_unescape_strliteral (sparp_t *sparp, const char *strg, int count_of_quotes, int mode)
{
caddr_t tmp_buf;
caddr_t res;
const char *err_msg;
const char *src_tail, *src_end;
char *tgt_tail;
src_tail = strg + (strg_is_long ? 3 : 1);
src_end = strg + strlen (strg) - (strg_is_long ? 3 : 1);
src_tail = strg + count_of_quotes;
src_end = strg + strlen (strg) - count_of_quotes;
tgt_tail = tmp_buf = dk_alloc_box ((src_end - src_tail) + 1, DV_SHORT_STRING);
while (src_tail < src_end)
{
switch (src_tail[0])
{
case '\\':
{
const char *bs_src = "abfnrtv/\\\'\"uU";
const char *bs_trans = "\a\b\f\n\r\t\v/\\\'\"\0\0";
const char *bs_lengths = "\2\2\2\2\2\2\2\2\2\2\2\6\012";
const char *bs_src = ((SPAR_STRLITERAL_SPARQL_QNAME == mode) ? "_~.-!$&()*+,:=/?#@%\'uU" : "abfnrtv/\\\'\"uU" );
const char *bs_trans = ((SPAR_STRLITERAL_SPARQL_QNAME == mode) ? "_~.-!$&()*+,:=/?#@%\'\0\0" : "\a\b\f\n\r\t\v/\\\'\"\0\0" );
const char *bs_lengths = ((SPAR_STRLITERAL_SPARQL_QNAME == mode) ? "\2\2\2\2\2\2\2\2\2\2\2\2\2\2\2\2\2\2\2\2\6\012" : "\2\2\2\2\2\2\2\2\2\2\2\6\012" );
const char *hit = strchr (bs_src, src_tail[1]);
char bs_len, bs_tran;
const char *nextchr;
if (NULL == hit)
{
err_msg = "Unsupported escape sequence after '\'";
goto err;
err_msg = "Unsupported escape sequence after '\'";
goto err;
}
bs_len = bs_lengths [hit - bs_src];
bs_tran = bs_trans [hit - bs_src];
Expand Down Expand Up @@ -904,7 +904,7 @@ caddr_t spar_strliteral (sparp_t *sparp, const char *strg, int strg_is_long, int
goto err;
}
}
else if (is_json && (6 == bs_len) && (acc >= 0xD800) && (acc <= 0xDFFF))
else if ((SPAR_STRLITERAL_JSON_STRING == mode) && (6 == bs_len) && (acc >= 0xD800) && (acc <= 0xDFFF))
{
if (acc >= 0xDC00)
{
Expand All @@ -931,14 +931,17 @@ caddr_t spar_strliteral (sparp_t *sparp, const char *strg, int strg_is_long, int
default: (tgt_tail++)[0] = (src_tail++)[0];
}
}
res = t_box_dv_short_nchars (tmp_buf, tgt_tail - tmp_buf);
if (SPAR_STRLITERAL_SPARQL_QNAME == mode)
res = t_box_dv_uname_nchars (tmp_buf, tgt_tail - tmp_buf);
else
res = t_box_dv_short_nchars (tmp_buf, tgt_tail - tmp_buf);
box_flags (res) = BF_UTF8;
dk_free_box (tmp_buf);
return res;

err:
dk_free_box (tmp_buf);
if (is_json)
if (SPAR_STRLITERAL_JSON_STRING == mode)
jsonyyerror_impl (err_msg);
else
sparyyerror_impl (sparp, NULL, err_msg);
Expand Down
62 changes: 35 additions & 27 deletions libsrc/Wi/sparql_l.l
Expand Up @@ -188,8 +188,11 @@ void sparyyerror_if_long_qname (caddr_t box, const char *lex_type_descr, struct
sparyyerror_if_long_qname (yylval->box, lex_type_descr, yyg); \
return (name);

#define TOKBOX_Q(n,name,lex_type_descr) { \
yylval->box = t_box_dv_uname_string (yytext+(n)); \
#define TOKBOX_Q_ESC(name,lex_type_descr) { \
if (strchr (yytext, '\\')) \
yylval->box = spar_unescape_strliteral (yyextra, yytext, 0, SPAR_STRLITERAL_SPARQL_QNAME); \
else \
yylval->box = t_box_dv_uname_string (yytext); \
TOKBOX_Q_FINAL(name,lex_type_descr) }

#define TOKBOX_Q2(n1,n2,name,lex_type_descr) { \
Expand All @@ -216,7 +219,7 @@ sparyyalloc (yy_size_t size, yyscan_t yyscanner)
}

void *
sparyyrealloc (void * ptr, yy_size_t sz , yyscan_t yyscanner)
sparyyrealloc (void * ptr, yy_size_t sz, yyscan_t yyscanner)
{
int old_sz = ((NULL == ptr) ? 0 : box_length (ptr));
if (old_sz < sz)
Expand Down Expand Up @@ -303,26 +306,31 @@ X ([Xx])
Y ([Yy])
Z ([Zz])

INTEGER_LITERAL ([0-9]+)
DECIMAL_LITERAL (([0-9]+"."[0-9]*)|("."[0-9]+))
DOUBLE_LITERAL (({INTEGER_LITERAL}|{DECIMAL_LITERAL})[eE][+-]?[0-9]+)

SPAR_SQ_PLAIN ([^\\''\r\n])
SPAR_DQ_PLAIN ([^\\""\r\n])
SPAR_ECHAR ([\\]([atbvnrf\\""'']|("u"{HEX}{HEX}{HEX}{HEX})|("U"{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX})))
S_NL ((\r\n)|(\n\r)|\n|\r)
HEX ([0-9A-Fa-f])

PN_CHARS_BASE ([A-Za-z\x7f-\xfe])
PN_CHARS_U_09 ([A-Za-z0-9_\x7f-\xfe])
PN_CHARS ([A-Za-z0-9_\x7f-\xfe-])
PN_PREFIX ({PN_CHARS_BASE}(([.]*{PN_CHARS})*))
PN_LOCAL ({PN_CHARS_U_09}(([.]*{PN_CHARS})*))
SPAR_VARNAME ([A-Za-z0-9_][A-Za-z0-9_\x7f-\xfe]*)
INTEGER_LITERAL ([0-9]+)
DECIMAL_LITERAL (([0-9]+"."[0-9]*)|("."[0-9]+))
DOUBLE_LITERAL (({INTEGER_LITERAL}|{DECIMAL_LITERAL})[eE][+-]?[0-9]+)

SPAR_SQ_PLAIN ([^\\''\r\n])
SPAR_DQ_PLAIN ([^\\""\r\n])
SPAR_UCHAR ([\\](("u"{HEX}{HEX}{HEX}{HEX})|("U"{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX})))
SPAR_ECHAR (([\\][atbvnrf\\""''])|{SPAR_UCHAR})
S_NL ((\r\n)|(\n\r)|\n|\r)
HEX ([0-9A-Fa-f])

PN_LOCAL_ESC ([\\][_~.!$&''()*+,;=/?#@%-])
PN_LOCAL_ESC_X ({PN_LOCAL_ESC}|([%]{HEX}{HEX}))
PN_CHARS_BASE ([A-Za-z\x7f-\xfe]|{SPAR_UCHAR})
PN_CHARS_U_09 ([A-Za-z0-9_\x7f-\xfe]|{SPAR_UCHAR})
PN_CHARS_U_09_C_PLX ([A-Za-z0-9_\x7f-\xfe:]|{SPAR_UCHAR}|{PN_LOCAL_ESC_X})
PN_CHARS ([A-Za-z0-9_\x7f-\xfe-]|{SPAR_UCHAR})
PN_CHARS_C_PLX ([A-Za-z0-9_\x7f-\xfe:-]|{SPAR_UCHAR}|{PN_LOCAL_ESC_X})
PN_PREFIX ({PN_CHARS_BASE}(([.]*{PN_CHARS})*))
PN_LOCAL ({PN_CHARS_U_09_C_PLX}(([.]*{PN_CHARS_C_PLX})*))
SPAR_VARNAME ([A-Za-z0-9_][A-Za-z0-9_\x7f-\xfe]*)
SPAR_PLAIN_SQLNAME ([A-Za-z_][A-Za-z0-9_]*)
SPAR_DQ_SQLNAME ([""][^""\\\r\n]*[""])
SPAR_SQLNAME (([A-Za-z_][A-Za-z0-9_]*)|([""][^""\\\r\n]*[""]))
SPAR_PARAMNAME (([A-Z]+"::")?(({SPAR_SQLNAME}("."{SPAR_SQLNAME})?)|(":"{SPAR_SQLNAME})|(":"[0-9]+)))
SPAR_PARAMNAME (([A-Z]+"::")?(({SPAR_SQLNAME}("."{SPAR_SQLNAME})?)|(":"{SPAR_SQLNAME})|(":"[0-9]+)))

%%

Expand Down Expand Up @@ -372,9 +380,9 @@ SPAR_PARAMNAME (([A-Z]+"::")?(({SPAR_SQLNAME}("."{SPAR_SQLNAME})?)|(":"{SPAR_SQL
return Q_IRI_REF;
}
<SPARQL>({PN_PREFIX}?)":"{PN_LOCAL} { TOKBOX_Q(0,QNAME,"qualified URI"); }
<SPARQL>({PN_PREFIX}?)":" { TOKBOX_Q(0,QNAME_NS,"namespace"); }
<SPARQL>"_:"{PN_LOCAL} { TOKBOX_Q(0,BLANK_NODE_LABEL,"blank node label"); }
<SPARQL>({PN_PREFIX}?)":"{PN_LOCAL} { TOKBOX_Q_ESC(QNAME,"qualified URI"); }
<SPARQL>({PN_PREFIX}?)":" { TOKBOX_Q_ESC(QNAME_NS,"namespace"); }
<SPARQL>"_:"{PN_LOCAL} { TOKBOX_Q_ESC(BLANK_NODE_LABEL,"blank node label"); }
<SPARQL>[?$]{SPAR_VARNAME} {
yylval->box = t_box_dv_uname_nchars (yytext + 1, yyleng - 1);
Expand Down Expand Up @@ -434,8 +442,8 @@ SPAR_PARAMNAME (([A-Z]+"::")?(({SPAR_SQLNAME}("."{SPAR_SQLNAME})?)|(":"{SPAR_SQL
<SPARQL>[''][''][''] { yymore(); SET_INNER_BEGIN(SPARQL_SSSQ); BEGIN_INNER; }
<SPARQL>[""][""][""] { yymore(); SET_INNER_BEGIN(SPARQL_DDDQ); BEGIN_INNER; }
<SPARQL_SSSQ>[''][''][''] { yylval->box = spar_strliteral (yyextra, yytext, 1, 0); BEGIN_OUTER; return SPARQL_STRING; }
<SPARQL_DDDQ>[""][""][""] { yylval->box = spar_strliteral (yyextra, yytext, 1, 0); BEGIN_OUTER; return SPARQL_STRING; }
<SPARQL_SSSQ>[''][''][''] { yylval->box = spar_unescape_strliteral (yyextra, yytext, 3, SPAR_STRLITERAL_SPARQL_STRING); BEGIN_OUTER; return SPARQL_STRING; }
<SPARQL_DDDQ>[""][""][""] { yylval->box = spar_unescape_strliteral (yyextra, yytext, 3, SPAR_STRLITERAL_SPARQL_STRING); BEGIN_OUTER; return SPARQL_STRING; }
<SPARQL_SSSQ>(([''](['']?))?{S_NL}) { yyextra->sparp_lexlineno++; yymore(); }
<SPARQL_DDDQ>(([""]([""]?))?{S_NL}) { yyextra->sparp_lexlineno++; yymore(); }
<SPARQL_SSSQ>((([''](['']?))?({SPAR_SQ_PLAIN}|{SPAR_ECHAR}))+) { yymore(); }
Expand All @@ -450,8 +458,8 @@ SPAR_PARAMNAME (([A-Z]+"::")?(({SPAR_SQLNAME}("."{SPAR_SQLNAME})?)|(":"{SPAR_SQL
<SPARQL>[''] { yymore(); SET_INNER_BEGIN(SPARQL_SQ); BEGIN_INNER; }
<SPARQL>[""] { yymore(); SET_INNER_BEGIN(SPARQL_DQ); BEGIN_INNER; }
<SPARQL_SQ>[''] { yylval->box = spar_strliteral (yyextra, yytext, 0, 0); BEGIN_OUTER; return SPARQL_STRING; }
<SPARQL_DQ>[""] { yylval->box = spar_strliteral (yyextra, yytext, 0, 0); BEGIN_OUTER; return SPARQL_STRING; }
<SPARQL_SQ>[''] { yylval->box = spar_unescape_strliteral (yyextra, yytext, 1, SPAR_STRLITERAL_SPARQL_STRING); BEGIN_OUTER; return SPARQL_STRING; }
<SPARQL_DQ>[""] { yylval->box = spar_unescape_strliteral (yyextra, yytext, 1, SPAR_STRLITERAL_SPARQL_STRING); BEGIN_OUTER; return SPARQL_STRING; }
<SPARQL_SQ>{S_NL} { sparyyerror ("End-of-line in a short single-quoted string"); yymore(); }
<SPARQL_DQ>{S_NL} { sparyyerror ("End-of-line in a short double-quoted string"); yymore(); }
<SPARQL_SQ>(({SPAR_SQ_PLAIN}|{SPAR_ECHAR})+) { yymore(); }
Expand Down

0 comments on commit 6f46c8b

Please sign in to comment.