Skip to content

Commit

Permalink
RDFXML parser crashes any character sequences decoded by the urldecode()
Browse files Browse the repository at this point in the history
These changes were contributed by @zozlak on my fork:
https://github.com/sweetyrdf/easyrdf/pull/16

There was a code change in the meantime, in comparison to
easyrdf/easyrdf::master. It was merged from easyrdf#294. It introduced the `fopen` call
when loading XML file.

Co-authored-by: Mateusz Żółtak <zozlak@zozlak.org>
Co-authored-by: Konrad Abicht <hi@inspirito.de>
  • Loading branch information
k00ni and zozlak committed Jul 6, 2020
1 parent 1148be7 commit 55f5d76
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 7 deletions.
20 changes: 13 additions & 7 deletions lib/Parser/RdfXml.php
Original file line number Diff line number Diff line change
Expand Up @@ -799,13 +799,19 @@ public function parse($graph, $data, $format, $baseUri)
$this->initXMLParser();

/* parse */
if (!xml_parse($this->xmlParser, $data, false)) {
$message = xml_error_string(xml_get_error_code($this->xmlParser));
throw new Exception(
'XML error: "' . $message . '"',
xml_get_current_line_number($this->xmlParser),
xml_get_current_column_number($this->xmlParser)
);

$resource = fopen('data://text/plain,' . urlencode($data), 'r');

while ($data = fread($resource, 1024 * 1024)) {
if (!xml_parse($this->xmlParser, $data, feof($resource))) {
$message = xml_error_string(xml_get_error_code($this->xmlParser));

throw new Exception(
sprintf('XML error: "%s"', $message),
xml_get_current_line_number($this->xmlParser),
xml_get_current_column_number($this->xmlParser)
);
}
}

xml_parser_free($this->xmlParser);
Expand Down
16 changes: 16 additions & 0 deletions test/EasyRdf/Parser/RdfXmlTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,20 @@ public function testIssue157()
$this->assertEquals('http://www.example.org/base#foo', $iri);
}
}

/**
* Tests faulty behavior of issue https://github.com/sweetyrdf/easyrdf/issues/15
*
* RDF-XML containing URL-encode special characters is parsed properly
*
* @see https://github.com/sweetyrdf/easyrdf/issues/15
*/
public function testParseIssue15()
{
$graph = new Graph();
$this->parser->parse($graph, readFixture('rdfxml/issue15.rdf'), 'rdfxml', null);
$resource = $graph->resource('https://vocabs.acdh.oeaw.ac.at/archeoaisets/clarin-vlo');
$value = $resource->getLiteral('http://purl.org/dc/terms/created');
$this->assertEquals('2019-10-07T11:15:48.188959+00:00', (string) $value);
}
}
6 changes: 6 additions & 0 deletions test/fixtures/issue15.rdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:dct="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about="https://vocabs.acdh.oeaw.ac.at/archeoaisets/clarin-vlo">
<dct:created rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2019-10-07T11:15:48.188959+00:00</dct:created>
</rdf:Description>
</rdf:RDF>

0 comments on commit 55f5d76

Please sign in to comment.