Skip to content
This repository has been archived by the owner on Aug 26, 2023. It is now read-only.

Commit

Permalink
Fix out-of-bounds reads for named entities
Browse files Browse the repository at this point in the history
  • Loading branch information
stevecheckoway committed Sep 21, 2018
1 parent 4cd483c commit 8d393e0
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 14 deletions.
24 changes: 14 additions & 10 deletions gumbo-parser/src/char_ref.c
Expand Up @@ -13179,27 +13179,30 @@ size_t match_named_char_ref (
int cs;
int act;
const char *p = str;
const char *const eof = 0;
const char *pe = str + size;
const char *const eof = pe;
const char *ts;
const char *te;
output[0] = output[1] = kGumboNoChar;

#line 13188 "src/char_ref.c"
#line 13189 "src/char_ref.c"
{
cs = start;
ts = 0;
te = 0;
act = 0;
}

#line 2148 "src/char_ref.rl"
#line 2149 "src/char_ref.rl"

#line 13198 "src/char_ref.c"
#line 13199 "src/char_ref.c"
{
int _slen;
int _trans;
const char *_keys;
const short *_inds;
if ( p == pe )
goto _test_eof;
if ( cs == 0 )
goto _out;
_resume:
Expand All @@ -13208,7 +13211,7 @@ size_t match_named_char_ref (
#line 1 "NONE"
{ts = p;}
break;
#line 13212 "src/char_ref.c"
#line 13215 "src/char_ref.c"
}

_keys = _trans_keys + (cs<<1);
Expand Down Expand Up @@ -22186,7 +22189,7 @@ size_t match_named_char_ref (
#line 1948 "src/char_ref.rl"
{{p = ((te))-1;}{output[0]=0xd7; {p++; goto _out; }}}
break;
#line 22190 "src/char_ref.c"
#line 22193 "src/char_ref.c"
}

_again:
Expand All @@ -22195,13 +22198,14 @@ size_t match_named_char_ref (
#line 1 "NONE"
{ts = 0;}
break;
#line 22199 "src/char_ref.c"
#line 22202 "src/char_ref.c"
}

if ( cs == 0 )
goto _out;
p += 1;
goto _resume;
if ( ++p != pe )
goto _resume;
_test_eof: {}
if ( p == eof )
{
if ( _eof_trans[cs] > 0 ) {
Expand All @@ -22213,7 +22217,7 @@ size_t match_named_char_ref (
_out: {}
}

#line 2149 "src/char_ref.rl"
#line 2150 "src/char_ref.rl"
size = p - str;
return cs >= 7623? size:0;
}
5 changes: 3 additions & 2 deletions gumbo-parser/src/char_ref.rl
Expand Up @@ -2140,12 +2140,13 @@ size_t match_named_char_ref (
int cs;
int act;
const char *p = str;
const char *const eof = 0;
const char *pe = str + size;
const char *const eof = pe;
const char *ts;
const char *te;
output[0] = output[1] = kGumboNoChar;
%% write init;
%% write exec noend;
%% write exec;
size = p - str;
return cs >= %%{ write first_final; }%%? size:0;
}
8 changes: 8 additions & 0 deletions gumbo-parser/test/tokenizer.cc
Expand Up @@ -915,6 +915,14 @@ TEST_F(GumboTokenizerTest, EscapedScriptStates2) {
NextEndTag(GUMBO_TAG_SCRIPT);
}

TEST_F(GumboTokenizerTest, ReadOutOfBounds) {
SetInput("&notindot;", 6);
NextChar(0x00AC, true);
NextChar('i');
NextChar('n');
AtEnd();
}

TEST_F(GumboTokenizerTest, ControlCharRefs) {
SetInput("&#x80;&#x82;&#x83;&#x84;&#x85;&#x86;&#x87;&#x88;&#x89;"
"&#x8A;&#x8B;&#x8C;&#x8E;&#x91;&#x92;&#x93;&#x94;&#x95;"
Expand Down
9 changes: 7 additions & 2 deletions scripts/entities.rb
Expand Up @@ -14,6 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# The output of this script should be compiled with ragel 6.10 which is the
# current stable version. Version 7 changes some stuff but what, precisely,
# isn't documented yet.

require 'psych'
require 'open-uri'

Expand Down Expand Up @@ -77,12 +81,13 @@
int cs;
int act;
const char *p = str;
const char *const eof = 0;
const char *pe = str + size;
const char *const eof = pe;
const char *ts;
const char *te;
output[0] = output[1] = kGumboNoChar;
%% write init;
%% write exec noend;
%% write exec;
size = p - str;
return cs >= %%{ write first_final; }%%? size:0;
}
Expand Down

0 comments on commit 8d393e0

Please sign in to comment.