From 6b0232ba3c3ed1e26ea8047c7220c35d6ae8044e Mon Sep 17 00:00:00 2001 From: Martijn van der Ven Date: Thu, 22 Mar 2018 09:12:03 +0100 Subject: [PATCH 1/6] Add broken test for unique and sorted rel values under rel-urls --- tests/Mf2/RelTest.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/Mf2/RelTest.php b/tests/Mf2/RelTest.php index 55c08f0..717d27e 100644 --- a/tests/Mf2/RelTest.php +++ b/tests/Mf2/RelTest.php @@ -176,4 +176,16 @@ public function testRelURLs() { $this->assertArrayHasKey('rels', $output['rel-urls']['http://example.com/articles.atom']); } + /** + * @see https://github.com/microformats/microformats2-parsing/issues/29 + * @see https://github.com/microformats/microformats2-parsing/issues/30 + */ + public function testRelURLsRelsUniqueAndSorted() { + $input = ' +'; + $parser = new Parser($input); + $output = $parser->parse(); + $this->assertEquals($output['rel-urls']['#']['rels'], ['archived', 'bookmark', 'me']); + } + } From ebffee46d2e8c59d8b2ed95e2e5364f39051b590 Mon Sep 17 00:00:00 2001 From: Martijn van der Ven Date: Thu, 22 Mar 2018 10:03:21 +0100 Subject: [PATCH 2/6] Add broken test for merging link information from several elements --- tests/Mf2/RelTest.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/Mf2/RelTest.php b/tests/Mf2/RelTest.php index 717d27e..30803cc 100644 --- a/tests/Mf2/RelTest.php +++ b/tests/Mf2/RelTest.php @@ -188,4 +188,16 @@ public function testRelURLsRelsUniqueAndSorted() { $this->assertEquals($output['rel-urls']['#']['rels'], ['archived', 'bookmark', 'me']); } + public function testRelURLsInfoMergesCorrectly() { + $input = 'This nodeValue +Not this nodeValue'; + $parser = new Parser($input); + $output = $parser->parse(); + $this->assertEquals($output['rel-urls']['#']['hreflang'], 'en'); + $this->assertArrayNotHasKey('media', $output['rel-urls']['#']); + $this->assertArrayNotHasKey('title', $output['rel-urls']['#']); + $this->assertArrayNotHasKey('type', $output['rel-urls']['#']); + $this->assertEquals($output['rel-urls']['#']['text'], 'This nodeValue'); + } + } From f2289a8d57c0aaf280d7af2f0aea6012fcfb8598 Mon Sep 17 00:00:00 2001 From: Martijn van der Ven Date: Thu, 22 Mar 2018 10:13:02 +0100 Subject: [PATCH 3/6] Improve parsing of link relationships * Parse the rel attribute in accordance with the WHATWG spec: https://infra.spec.whatwg.org/#split-on-ascii-whitespace * Only list unique rel values in the rel-urls output, fixes #159: https://github.com/microformats/microformats2-parsing/issues/30 * Sort the unique rel values alphabetically: https://github.com/microformats/microformats2-parsing/issues/29 * Correctly merge attribute values into the resulting object. --- Mf2/Parser.php | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/Mf2/Parser.php b/Mf2/Parser.php index 4d72f94..d4aa69c 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -1250,16 +1250,15 @@ public function parseRelsAndAlternates() { // Iterate through all a, area and link elements with rel attributes foreach ($this->xpath->query('//a[@rel and @href] | //link[@rel and @href] | //area[@rel and @href]') as $hyperlink) { - if ($hyperlink->getAttribute('rel') == '') { + // Parse the set of rels for the current link + $linkRels = array_unique(array_filter(preg_split('/[\t\n\f\r ]/', $hyperlink->getAttribute('rel')))); + if (count($linkRels) === 0) { continue; } // Resolve the href $href = $this->resolveUrl($hyperlink->getAttribute('href')); - // Split up the rel into space-separated values - $linkRels = array_filter(explode(' ', $hyperlink->getAttribute('rel'))); - $rel_attributes = array(); if ($hyperlink->hasAttribute('media')) { @@ -1299,13 +1298,27 @@ public function parseRelsAndAlternates() { $rels[$rel][] = $href; } - if (!in_array($href, $rel_urls)) { - $rel_urls[$href] = array_merge( - $rel_attributes, - array('rels' => $linkRels) - ); + if (!array_key_exists($href, $rel_urls)) { + $rel_urls[$href] = array('rels' => array()); } + // Add the attributes collected only if they were not already set + $rel_urls[$href] = array_merge( + $rel_attributes, + $rel_urls[$href] + ); + + // Merge current rels with those already set + $rel_urls[$href]['rels'] = array_merge( + $rel_urls[$href]['rels'], + $linkRels + ); + } + + // Alphabetically sort the rels arrays after removing duplicates + foreach ($rel_urls as $href => $object) { + $rel_urls[$href]['rels'] = array_unique($rel_urls[$href]['rels']); + sort($rel_urls[$href]['rels']); } if (empty($rels) and $this->jsonMode) { @@ -1314,8 +1327,8 @@ public function parseRelsAndAlternates() { if (empty($rel_urls) and $this->jsonMode) { $rel_urls = new stdClass(); - } - + } + return array($rels, $rel_urls, $alternates); } From 31ca035dd4bddb67b88c767c3ebc15c119900e0e Mon Sep 17 00:00:00 2001 From: Martijn van der Ven Date: Thu, 22 Mar 2018 10:48:01 +0100 Subject: [PATCH 4/6] Test and fix for duplicate URLs in the rels object --- Mf2/Parser.php | 6 +++++- tests/Mf2/RelTest.php | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Mf2/Parser.php b/Mf2/Parser.php index d4aa69c..4506e62 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -1295,7 +1295,11 @@ public function parseRelsAndAlternates() { } foreach ($linkRels as $rel) { - $rels[$rel][] = $href; + if (!array_key_exists($rel, $rels)) { + $rels[$rel] = array($href); + } elseif (!in_array($href, $rels[$rel])) { + $rels[$rel][] = $href; + } } if (!array_key_exists($href, $rel_urls)) { diff --git a/tests/Mf2/RelTest.php b/tests/Mf2/RelTest.php index 30803cc..021a296 100644 --- a/tests/Mf2/RelTest.php +++ b/tests/Mf2/RelTest.php @@ -200,4 +200,13 @@ public function testRelURLsInfoMergesCorrectly() { $this->assertEquals($output['rel-urls']['#']['text'], 'This nodeValue'); } + public function testRelURLsNoDuplicates() { + $input = ' + +'; + $parser = new Parser($input); + $output = $parser->parse(); + $this->assertEquals($output['rels']['a'], ['#a', '#b']); + } + } From 108fbf05a411fd3975dc55f2e148657c3c242b90 Mon Sep 17 00:00:00 2001 From: Martijn van der Ven Date: Thu, 22 Mar 2018 10:57:09 +0100 Subject: [PATCH 5/6] Stop using the short array syntax --- tests/Mf2/RelTest.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/Mf2/RelTest.php b/tests/Mf2/RelTest.php index 021a296..2f34b23 100644 --- a/tests/Mf2/RelTest.php +++ b/tests/Mf2/RelTest.php @@ -185,7 +185,7 @@ public function testRelURLsRelsUniqueAndSorted() { '; $parser = new Parser($input); $output = $parser->parse(); - $this->assertEquals($output['rel-urls']['#']['rels'], ['archived', 'bookmark', 'me']); + $this->assertEquals($output['rel-urls']['#']['rels'], array('archived', 'bookmark', 'me')); } public function testRelURLsInfoMergesCorrectly() { @@ -206,7 +206,7 @@ public function testRelURLsNoDuplicates() { '; $parser = new Parser($input); $output = $parser->parse(); - $this->assertEquals($output['rels']['a'], ['#a', '#b']); + $this->assertEquals($output['rels']['a'], array('#a', '#b')); } } From 64e79666a0aabfa37c03b27a59620ca3357fa759 Mon Sep 17 00:00:00 2001 From: Martijn van der Ven Date: Thu, 22 Mar 2018 13:32:11 +0100 Subject: [PATCH 6/6] Never do falsy checks on strings --- Mf2/Parser.php | 4 ++-- tests/Mf2/RelTest.php | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Mf2/Parser.php b/Mf2/Parser.php index 4506e62..350fcf0 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -1277,8 +1277,8 @@ public function parseRelsAndAlternates() { $rel_attributes['type'] = $hyperlink->getAttribute('type'); } - if ($hyperlink->nodeValue) { - $rel_attributes['text'] = $hyperlink->nodeValue; + if (strlen($hyperlink->textContent) > 0) { + $rel_attributes['text'] = $hyperlink->textContent; } if ($this->enableAlternates) { diff --git a/tests/Mf2/RelTest.php b/tests/Mf2/RelTest.php index 2f34b23..2d737a2 100644 --- a/tests/Mf2/RelTest.php +++ b/tests/Mf2/RelTest.php @@ -209,4 +209,14 @@ public function testRelURLsNoDuplicates() { $this->assertEquals($output['rels']['a'], array('#a', '#b')); } + public function testRelURLsFalsyTextVSEmpty() { + $input = '0 +'; + $parser = new Parser($input); + $output = $parser->parse(); + $this->assertArrayHasKey('text', $output['rel-urls']['#a']); + $this->assertEquals($output['rel-urls']['#a']['text'], '0'); + $this->assertArrayNotHasKey('text', $output['rel-urls']['#b']); + } + }