Skip to content

Commit

Permalink
Merge pull request #162 from Zegnat/rel-parsing
Browse files Browse the repository at this point in the history
Improve rel parsing
  • Loading branch information
aaronpk committed Mar 25, 2018
2 parents 743d979 + 64e7966 commit 2c6677d
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 14 deletions.
45 changes: 31 additions & 14 deletions Mf2/Parser.php
Expand Up @@ -1250,16 +1250,15 @@ public function parseRelsAndAlternates() {

// Iterate through all a, area and link elements with rel attributes
foreach ($this->xpath->query('//a[@rel and @href] | //link[@rel and @href] | //area[@rel and @href]') as $hyperlink) {
if ($hyperlink->getAttribute('rel') == '') {
// Parse the set of rels for the current link
$linkRels = array_unique(array_filter(preg_split('/[\t\n\f\r ]/', $hyperlink->getAttribute('rel'))));
if (count($linkRels) === 0) {
continue;
}

// Resolve the href
$href = $this->resolveUrl($hyperlink->getAttribute('href'));

// Split up the rel into space-separated values
$linkRels = array_filter(explode(' ', $hyperlink->getAttribute('rel')));

$rel_attributes = array();

if ($hyperlink->hasAttribute('media')) {
Expand All @@ -1278,8 +1277,8 @@ public function parseRelsAndAlternates() {
$rel_attributes['type'] = $hyperlink->getAttribute('type');
}

if ($hyperlink->nodeValue) {
$rel_attributes['text'] = $hyperlink->nodeValue;
if (strlen($hyperlink->textContent) > 0) {
$rel_attributes['text'] = $hyperlink->textContent;
}

if ($this->enableAlternates) {
Expand All @@ -1296,16 +1295,34 @@ public function parseRelsAndAlternates() {
}

foreach ($linkRels as $rel) {
$rels[$rel][] = $href;
if (!array_key_exists($rel, $rels)) {
$rels[$rel] = array($href);
} elseif (!in_array($href, $rels[$rel])) {
$rels[$rel][] = $href;
}
}

if (!in_array($href, $rel_urls)) {
$rel_urls[$href] = array_merge(
$rel_attributes,
array('rels' => $linkRels)
);
if (!array_key_exists($href, $rel_urls)) {
$rel_urls[$href] = array('rels' => array());
}

// Add the attributes collected only if they were not already set
$rel_urls[$href] = array_merge(
$rel_attributes,
$rel_urls[$href]
);

// Merge current rels with those already set
$rel_urls[$href]['rels'] = array_merge(
$rel_urls[$href]['rels'],
$linkRels
);
}

// Alphabetically sort the rels arrays after removing duplicates
foreach ($rel_urls as $href => $object) {
$rel_urls[$href]['rels'] = array_unique($rel_urls[$href]['rels']);
sort($rel_urls[$href]['rels']);
}

if (empty($rels) and $this->jsonMode) {
Expand All @@ -1314,8 +1331,8 @@ public function parseRelsAndAlternates() {

if (empty($rel_urls) and $this->jsonMode) {
$rel_urls = new stdClass();
}
}

return array($rels, $rel_urls, $alternates);
}

Expand Down
43 changes: 43 additions & 0 deletions tests/Mf2/RelTest.php
Expand Up @@ -176,4 +176,47 @@ public function testRelURLs() {
$this->assertArrayHasKey('rels', $output['rel-urls']['http://example.com/articles.atom']);
}

/**
* @see https://github.com/microformats/microformats2-parsing/issues/29
* @see https://github.com/microformats/microformats2-parsing/issues/30
*/
public function testRelURLsRelsUniqueAndSorted() {
$input = '<a href="#" rel="me bookmark"></a>
<a href="#" rel="bookmark archived"></a>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertEquals($output['rel-urls']['#']['rels'], array('archived', 'bookmark', 'me'));
}

public function testRelURLsInfoMergesCorrectly() {
$input = '<a href="#" rel="a">This nodeValue</a>
<a href="#" rel="a" hreflang="en">Not this nodeValue</a>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertEquals($output['rel-urls']['#']['hreflang'], 'en');
$this->assertArrayNotHasKey('media', $output['rel-urls']['#']);
$this->assertArrayNotHasKey('title', $output['rel-urls']['#']);
$this->assertArrayNotHasKey('type', $output['rel-urls']['#']);
$this->assertEquals($output['rel-urls']['#']['text'], 'This nodeValue');
}

public function testRelURLsNoDuplicates() {
$input = '<a href="#a" rel="a"></a>
<a href="#b" rel="a"></a>
<a href="#a" rel="a"></a>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertEquals($output['rels']['a'], array('#a', '#b'));
}

public function testRelURLsFalsyTextVSEmpty() {
$input = '<a href="#a" rel="a">0</a>
<a href="#b" rel="b"></a>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertArrayHasKey('text', $output['rel-urls']['#a']);
$this->assertEquals($output['rel-urls']['#a']['text'], '0');
$this->assertArrayNotHasKey('text', $output['rel-urls']['#b']);
}

}

0 comments on commit 2c6677d

Please sign in to comment.