Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a fuzz target for _elementtree.XMLParser._parse_whole #111477

Merged
merged 9 commits into from
Nov 3, 2023
134 changes: 134 additions & 0 deletions Modules/_xxtestfuzz/dictionaries/fuzz_elementtree_parsewhole.dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
tok_1="<"
tok_2=">"
tok_3="/"
tok_4="<![CDATA["
tok_5="]]>"
tok_6="<?xml"
tok_6a="?>"
tok_7="version"
tok_8="encoding"
tok_9="UTF-8"
tok_9a="UTF-16"
tok_9b="ASCII"
tok_9c="LATIN-1"
tok_9d="UTF-32"
tok_9e="UTF-7"
tok_10="\""
tok_11="&"
tok_11a="&#"
tok_11b=";"
tok_12="'"
tok_13="<!--"
tok_13a="-->"
tok_14="</"
tok_15="="
tok_16="&gt;"
tok_17="&lt;"
tok_18="&amp;"
tok_19="&apos;"
tok_20="&quot;"
tok_21="&#20013;"
tok_22="&#x4e2d;"
tok_23="&#0;"
tok_24="<!ENTITY"
tok_25="SYSTEM"
tok_26="PUBLIC"
tok_27="NDATA"
tok_28="["
tok_29="]"
tok_30="\\"
tok_30a="\\x00"
tok_31="0"
tok_32="1"
tok_33="2"
tok_34="3"
tok_35="4"
tok_36="5"
tok_37="6"
tok_38="7"
tok_39="8"
tok_40="9"
tok_41="iso8859_1"
tok_42="latin_1"
tok_43="us.ascii"
tok_43a="us_ascii"
tok_43b="ascii"
tok_44="xml:"
tok_45="surrogate"
tok_46="replace"
tok_47="strict"

attr_encoding=" encoding=\"1\""
attr_generic=" a=\"1\""
attr_href=" href=\"1\""
attr_standalone=" standalone=\"no\""
attr_version=" version=\"1\""
attr_xml_base=" xml:base=\"1\""
attr_xml_id=" xml:id=\"1\""
attr_xml_lang=" xml:lang=\"1\""
attr_xml_space=" xml:space=\"1\""
attr_xmlns=" xmlns=\"1\""

entity_builtin="&lt;"
entity_decimal="&#1;"
entity_external="&a;"
entity_hex="&#x1;"

string_any="ANY"
string_brackets="[]"
string_cdata="CDATA"
string_col_fallback=":fallback"
string_col_generic=":a"
string_col_include=":include"
string_dashes="--"
string_empty="EMPTY"
string_empty_dblquotes="\"\""
string_empty_quotes="''"
string_entities="ENTITIES"
string_entity="ENTITY"
string_fixed="#FIXED"
string_id="ID"
string_idref="IDREF"
string_idrefs="IDREFS"
string_implied="#IMPLIED"
string_nmtoken="NMTOKEN"
string_nmtokens="NMTOKENS"
string_notation="NOTATION"
string_parentheses="()"
string_pcdata="#PCDATA"
string_percent="%a"
string_public="PUBLIC"
string_required="#REQUIRED"
string_schema=":schema"
string_system="SYSTEM"
string_ucs4="UCS-4"
string_utf16="UTF-16"
string_utf8="UTF-8"
string_xmlns="xmlns:"

tag_attlist="<!ATTLIST"
tag_cdata="<![CDATA["
tag_close="</a>"
tag_doctype="<!DOCTYPE"
tag_element="<!ELEMENT"
tag_entity="<!ENTITY"
tag_ignore="<![IGNORE["
tag_include="<![INCLUDE["
tag_notation="<!NOTATION"
tag_open="<a>"
tag_open_close="<a />"
tag_open_exclamation="<!"
tag_open_q="<?"
tag_sq2_close="]]>"
tag_xml_q="<?xml?>"

encoding_utf="UTF-"
encoding_iso1="ISO-8859"
encoding_iso3="ISO-10646-UCS"
encoding_iso5="ISO-LATIN-1"
encoding_jis="SHIFT_JIS"
encoding_utf7="UTF-7"
encoding_utf16le="UTF-16BE"
encoding_utf16le="UTF-16LE"
encoding_ascii="US-ASCII"
encoding_latin1="latin1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
<c14n2:IgnoreComments>true</c14n2:IgnoreComments>
</dsig:CanonicalizationMethod>

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" Algorithm="http://www.w3.org/2010/xml-c14n2">
</dsig:CanonicalizationMethod>

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
<c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
</dsig:CanonicalizationMethod>

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
<c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
<c14n2:QNameAware>
<c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/>
</c14n2:QNameAware>
</dsig:CanonicalizationMethod>

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
<c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
<c14n2:QNameAware>
<c14n2:Element Name="bar" NS="http://a"/>
<c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/>
</c14n2:QNameAware>
</dsig:CanonicalizationMethod>

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
<c14n2:QNameAware>
<c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/>
</c14n2:QNameAware>
</dsig:CanonicalizationMethod>

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
<c14n2:QNameAware>
<c14n2:Element Name="bar" NS="http://a"/>
</c14n2:QNameAware>
</dsig:CanonicalizationMethod>

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
<c14n2:QNameAware>
<c14n2:Element Name="bar" NS="http://a"/>
<c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/>
</c14n2:QNameAware>
</dsig:CanonicalizationMethod>

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
<c14n2:TrimTextNodes>true</c14n2:TrimTextNodes>
</dsig:CanonicalizationMethod>

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<a b='01234567890123456古人咏雪抽幽思骋妍辞竞险韵偶得一编奇绝辄擅美当时流声后代是以北门之风南山之雅梁园之简黄台之赋至今为作家称述尚矣及至洛阳之卧剡溪之兴灞桥之思亦皆传为故事钱塘沈履德先生隐居西湖两峰间孤高贞洁与雪同调方大雪满天皴肤粟背之际先生乃鹿中豹舄端居闭门或扶童曳杖踏遍六桥三竺时取古人诗讽咏之合唐宋元诸名家集句成诗得二百四十章联络通穿如出一人如呵一气气立于言表格备于篇中略无掇拾补凑之形非胸次包罗壮阔笔底驱走鲍谢欧苏诸公不能为此世称王荆公为集句擅长观其在钟山对雪仅题数篇未见有此噫嘻奇矣哉亦富矣哉予慕先生有袁安之节愧不能为慧可之立乃取新集命工传写使海内同好者知先生为博古传述之士而一新世人之耳目他日必有慕潜德阐幽光而剞劂以传者余实为之执殳矣
弘治戊午仲冬望日慈溪杨子器衵于海虞官舍序毕诗部' />
14 changes: 14 additions & 0 deletions Modules/_xxtestfuzz/fuzz_elementtree_parsewhole_corpus/inC14N1.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0"?>

<?xml-stylesheet href="doc.xsl"
type="text/xsl" ?>

<!DOCTYPE doc SYSTEM "doc.dtd">

<doc>Hello, world!<!-- Comment 1 --></doc>

<?pi-without-data ?>

<!-- Comment 2 -->

<!-- Comment 3 -->
11 changes: 11 additions & 0 deletions Modules/_xxtestfuzz/fuzz_elementtree_parsewhole_corpus/inC14N2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<doc>
<clean> </clean>
<dirty> A B </dirty>
<mixed>
A
<clean> </clean>
B
<dirty> A B </dirty>
C
</mixed>
</doc>
18 changes: 18 additions & 0 deletions Modules/_xxtestfuzz/fuzz_elementtree_parsewhole_corpus/inC14N3.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]>
<doc>
<e1 />
<e2 ></e2>
<e3 name = "elem3" id="elem3" />
<e4 name="elem4" id="elem4" ></e4>
<e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm"
xmlns:b="http://www.ietf.org"
xmlns:a="http://www.w3.org"
xmlns="http://example.org"/>
<e6 xmlns="" xmlns:a="http://www.w3.org">
<e7 xmlns="http://www.ietf.org">
<e8 xmlns="" xmlns:a="http://www.w3.org">
<e9 xmlns="" xmlns:a="http://www.ietf.org"/>
</e8>
</e7>
</e6>
</doc>
13 changes: 13 additions & 0 deletions Modules/_xxtestfuzz/fuzz_elementtree_parsewhole_corpus/inC14N4.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<!DOCTYPE doc [
<!ATTLIST normId id ID #IMPLIED>
<!ATTLIST normNames attr NMTOKENS #IMPLIED>
]>
<doc>
<text>First line&#x0d;&#10;Second line</text>
<value>&#x32;</value>
<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>
<norm attr=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>
<normNames attr=' A &#x20;&#13;&#xa;&#9; B '/>
<normId id=' &apos;&#x20;&#13;&#xa;&#9; &apos; '/>
</doc>
12 changes: 12 additions & 0 deletions Modules/_xxtestfuzz/fuzz_elementtree_parsewhole_corpus/inC14N5.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE doc [
<!ATTLIST doc attrExtEnt CDATA #IMPLIED>
<!ENTITY ent1 "Hello">
<!ENTITY ent2 SYSTEM "world.txt">
<!ENTITY entExt SYSTEM "earth.gif" NDATA gif>
<!NOTATION gif SYSTEM "viewgif.exe">
]>
<doc attrExtEnt="entExt">
&ent1;, &ent2;!
</doc>

<!-- Let world.txt contain "world" (excluding the quotes) -->
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<doc>&#169;</doc>
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:child="http://c" xmlns:soap-env="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<a:bar>xsd:string</a:bar>
<dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
</a:foo>
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<foo xmlns:a="http://a" xmlns:b="http://b">
<b:bar b:att1="val" att2="val"/>
</foo>
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:c="http://c">
<b:bar/>
<b:bar/>
<b:bar/>
<a:bar b:att1="val"/>
</a:foo>
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<foo xmlns:a="http://z3" xmlns:b="http://z2" a:att1="val1" b:att2="val2">
<bar xmlns="http://z0" xmlns:a="http://z2" a:att1="val1" b:att2="val2" xmlns:b="http://z3" />
</foo>
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<a:foo xmlns:a="http://z3" xmlns:b="http://z2" b:att1="val1" c:att3="val3" b:att2="val2" xmlns:c="http://z1" xmlns:d="http://z0">
<c:bar/>
<c:bar d:att3="val3"/>
</a:foo>
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<foo xmlns:a="http://z0" xmlns:b="http://z0" a:att1="val1" b:att2="val2" xmlns="http://z0">
<c:bar xmlns:a="http://z0" xmlns:c="http://z0" c:att3="val3"/>
<d:bar xmlns:d="http://z0"/>
</foo>
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<foo xmlns="http://z0" xml:id="23">
<bar xsi:type="xsd:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">data</bar>
</foo>
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml-stylesheet href="doc.xsl"
type="text/xsl" ?>
<doc>Hello, world!<!-- Comment 1 --></doc>
<?pi-without-data?>
<!-- Comment 2 -->
<!-- Comment 3 -->
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml-stylesheet href="doc.xsl"
type="text/xsl" ?>
<doc>Hello, world!</doc>
<?pi-without-data?>
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<doc>
<clean> </clean>
<dirty> A B </dirty>
<mixed>
A
<clean> </clean>
B
<dirty> A B </dirty>
C
</mixed>
</doc>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<doc><clean></clean><dirty>A B</dirty><mixed>A<clean></clean>B<dirty>A B</dirty>C</mixed></doc>
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<doc>
<e1></e1>
<e2></e2>
<e3 id="elem3" name="elem3"></e3>
<e4 id="elem4" name="elem4"></e4>
<e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5>
<e6>
<e7 xmlns="http://www.ietf.org">
<e8 xmlns="">
<e9 attr="default"></e9>
</e8>
</e7>
</e6>
</doc>
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<n0:doc xmlns:n0="">
<n0:e1></n0:e1>
<n0:e2></n0:e2>
<n0:e3 id="elem3" name="elem3"></n0:e3>
<n0:e4 id="elem4" name="elem4"></n0:e4>
<n1:e5 xmlns:n1="http://example.org" xmlns:n2="http://www.ietf.org" xmlns:n3="http://www.w3.org" attr="I'm" attr2="all" n2:attr="sorted" n3:attr="out"></n1:e5>
<n0:e6>
<n2:e7 xmlns:n2="http://www.ietf.org">
<n0:e8>
<n0:e9 attr="default"></n0:e9>
</n0:e8>
</n2:e7>
</n0:e6>
</n0:doc>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<doc><e1></e1><e2></e2><e3 id="elem3" name="elem3"></e3><e4 id="elem4" name="elem4"></e4><e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5><e6><e7 xmlns="http://www.ietf.org"><e8 xmlns=""><e9 attr="default"></e9></e8></e7></e6></doc>
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<doc>
<text>First line&#xD;
Second line</text>
<value>2</value>
<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>
<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>
<norm attr=" ' &#xD;&#xA;&#x9; ' "></norm>
<normNames attr="A &#xD;&#xA;&#x9; B"></normNames>
<normId id="' &#xD;&#xA;&#x9; '"></normId>
</doc>
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<doc><text>First line&#xD;
Second line</text><value>2</value><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute><compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute><norm attr=" ' &#xD;&#xA;&#x9; ' "></norm><normNames attr="A &#xD;&#xA;&#x9; B"></normNames><normId id="' &#xD;&#xA;&#x9; '"></normId></doc>
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<doc attrExtEnt="entExt">
Hello, world!
</doc>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<doc attrExtEnt="entExt">Hello, world!</doc>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<doc>©</doc>