diff --git a/library/SimplePie.php b/library/SimplePie.php index ac3d62128..24931b255 100755 --- a/library/SimplePie.php +++ b/library/SimplePie.php @@ -1614,18 +1614,32 @@ protected function fetch_data(&$cache) $copyContentType = $file->headers['content-type']; try { - // First check for h-entry microformats in the current file. $microformats = false; - $position = 0; - while ($position = strpos($file->body, 'h-entry', $position)) - { - $start = $position < 200 ? 0 : $position - 200; - $check = substr($file->body, $start, 400); - if ($microformats = preg_match('/class="[^"]*h-entry/', $check)) + if (function_exists('Mf2\parse')) { + // Check for both h-feed and h-entry, as both a feed with no entries + // and a list of entries without an h-feed wrapper are both valid. + $position = 0; + while ($position = strpos($file->body, 'h-feed', $position)) { - break; + $start = $position < 200 ? 0 : $position - 200; + $check = substr($file->body, $start, 400); + if ($microformats = preg_match('/class="[^"]*h-feed/', $check)) + { + break; + } + $position += 7; + } + $position = 0; + while ($position = strpos($file->body, 'h-entry', $position)) + { + $start = $position < 200 ? 0 : $position - 200; + $check = substr($file->body, $start, 400); + if ($microformats = preg_match('/class="[^"]*h-entry/', $check)) + { + break; + } + $position += 7; } - $position += 7; } // Now also do feed discovery, but if an h-entry was found don't // overwrite the current value of file. @@ -1633,6 +1647,11 @@ protected function fetch_data(&$cache) $this->all_discovered_feeds); if ($microformats) { + if ($hub = $locate->get_rel_link('hub')) + { + $self = $locate->get_rel_link('self'); + $this->store_links($file, $hub, $self); + } // Push the current file onto all_discovered feeds so the user can // be shown this as one of the options. if (isset($this->all_discovered_feeds)) { @@ -1681,7 +1700,6 @@ protected function fetch_data(&$cache) $this->raw_data = $file->body; $this->permanent_url = $file->permanent_url; - $this->store_links($file); $headers = $file->headers; $sniffer = $this->registry->create('Content_Type_Sniffer', array(&$file)); $sniffed = $sniffer->get_type(); @@ -3221,52 +3239,19 @@ public static function merge_items($urls, $start = 0, $end = 0, $limit = 0) * * There is no way to find PuSH links in the body of a microformats feed, * so they are added to the headers when found, to be used later by get_links. - * @param SimplePie_File + * @param SimplePie_File $file + * @param string $hub + * @param string $self */ - private function store_links(&$file) { + private function store_links(&$file, $hub, $self) { if (isset($file->headers['link']['hub']) || (isset($file->headers['link']) && preg_match('/rel=hub/', $file->headers['link']))) { return; } - $hub = ''; - $self = ''; - $position = 0; - $regex1 = '/<(?:link|a) href="([^"]*)" rel="[^"]*hub[^"]*"/'; - $regex2 = '/<(?:link|a) rel="[^"]*hub[^"]*" href="([^"]*)"/'; - while ($position = strpos($file->body, 'rel="hub"', $position + 7)) - { - $start = $position < 200 ? 0 : $position - 200; - $check = substr($file->body, $start, 400); - if (preg_match($regex1, $check, $match)) - { - $hub = $match[1] === '' ? $file->url : $match[1]; - } - else if (preg_match($regex2, $check, $match)) - { - $hub = $match[1] === '' ? $file->url : $match[1]; - } - if ($hub !== '') break; - } - $position = 0; - $regex1 = '/<(?:link|a) href="([^"]*)" rel="[^"]*self[^"]*"/'; - $regex2 = '/<(?:link|a) rel="[^"]*self[^"]*" href="([^"]*)"/'; - while ($position = strpos($file->body, 'rel="self"', $position + 7)) - { - $start = $position < 200 ? 0 : $position - 200; - $check = substr($file->body, $start, 400); - if (preg_match($regex1, $check, $match)) - { - $self = $match[1] === '' ? $file->url : $match[1]; - } - if (preg_match($regex2, $check, $match)) - { - $self = $match[1] === '' ? $file->url : $match[1]; - } - if ($self !== '') break; - } - if ($hub !== '') + + if ($hub) { if (isset($file->headers['link'])) { @@ -3280,7 +3265,7 @@ private function store_links(&$file) { $file->headers['link'] = ''; } $file->headers['link'] .= '<'.$hub.'>; rel=hub'; - if ($self !== '') + if ($self) { $file->headers['link'] .= ', <'.$self.'>; rel=self'; } diff --git a/library/SimplePie/IRI.php b/library/SimplePie/IRI.php index 532174cf5..2b3fbaf07 100644 --- a/library/SimplePie/IRI.php +++ b/library/SimplePie/IRI.php @@ -776,24 +776,20 @@ protected function scheme_normalization() */ public function is_valid() { - $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null; - if ($this->ipath !== '' && - ( - $isauthority && ( - $this->ipath[0] !== '/' || - substr($this->ipath, 0, 2) === '//' - ) || - ( - $this->scheme === null && - !$isauthority && - strpos($this->ipath, ':') !== false && - (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/')) - ) - ) - ) - { - return false; - } + if ($this->ipath === '') return true; + + $isauthority = $this->iuserinfo !== null || $this->ihost !== null || + $this->port !== null; + if ($isauthority && $this->ipath[0] === '/') return true; + + if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) return false; + + // Relative urls cannot have a colon in the first path segment (and the + // slashes themselves are not included so skip the first character). + if (!$this->scheme && !$isauthority && + strpos($this->ipath, ':') !== false && + strpos($this->ipath, '/', 1) !== false && + strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) return false; return true; } diff --git a/library/SimplePie/Locator.php b/library/SimplePie/Locator.php index 198cfd104..1010066cf 100644 --- a/library/SimplePie/Locator.php +++ b/library/SimplePie/Locator.php @@ -281,7 +281,7 @@ public function get_links() { $href = trim($link->getAttribute('href')); $parsed = $this->registry->call('Misc', 'parse_url', array($href)); - if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme'])) + if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme'])) { if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo()) { @@ -318,6 +318,57 @@ public function get_links() return null; } + public function get_rel_link($rel) + { + if ($this->dom === null) + { + throw new SimplePie_Exception('DOMDocument not found, unable to use '. + 'locator'); + } + if (!class_exists('DOMXpath')) + { + throw new SimplePie_Exception('DOMXpath not found, unable to use '. + 'get_rel_link'); + } + + $xpath = new DOMXpath($this->dom); + $query = '//a[@rel and @href] | //link[@rel and @href]'; + foreach ($xpath->query($query) as $link) + { + $href = trim($link->getAttribute('href')); + $parsed = $this->registry->call('Misc', 'parse_url', array($href)); + if ($parsed['scheme'] === '' || + preg_match('/^https?$/i', $parsed['scheme'])) + { + if (method_exists($link, 'getLineNo') && + $this->base_location < $link->getLineNo()) + { + $href = + $this->registry->call('Misc', 'absolutize_url', + array(trim($link->getAttribute('href')), + $this->base)); + } + else + { + $href = + $this->registry->call('Misc', 'absolutize_url', + array(trim($link->getAttribute('href')), + $this->http_base)); + } + if ($href === false) + { + return null; + } + $rel_values = explode(' ', strtolower($link->getAttribute('rel'))); + if (in_array($rel, $rel_values)) + { + return $href; + } + } + } + return null; + } + public function extension(&$array) { foreach ($array as $key => $value) diff --git a/library/SimplePie/Parser.php b/library/SimplePie/Parser.php index 15e686d56..17139abe9 100644 --- a/library/SimplePie/Parser.php +++ b/library/SimplePie/Parser.php @@ -76,14 +76,27 @@ public function set_registry(SimplePie_Registry $registry) public function parse(&$data, $encoding, $url = '') { - $position = 0; - while ($position = strpos($data, 'h-entry', $position)) { - $start = $position < 200 ? 0 : $position - 200; - $check = substr($data, $start, 400); - if (preg_match('/class="[^"]*h-entry/', $check)) { - return $this->parse_microformats($data, $url); + if (function_exists('Mf2\parse')) { + // Check for both h-feed and h-entry, as both a feed with no entries + // and a list of entries without an h-feed wrapper are both valid. + $position = 0; + while ($position = strpos($data, 'h-feed', $position)) { + $start = $position < 200 ? 0 : $position - 200; + $check = substr($data, $start, 400); + if (preg_match('/class="[^"]*h-feed/', $check)) { + return $this->parse_microformats($data, $url); + } + $position += 7; + } + $position = 0; + while ($position = strpos($data, 'h-entry', $position)) { + $start = $position < 200 ? 0 : $position - 200; + $check = substr($data, $start, 400); + if (preg_match('/class="[^"]*h-entry/', $check)) { + return $this->parse_microformats($data, $url); + } + $position += 7; } - $position += 7; } // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character @@ -439,10 +452,8 @@ private function parse_hcard($data, $category = false) { } private function parse_microformats(&$data, $url) { - if (!function_exists('Mf2\parse')) return false; - $feed_title = ''; - $icon = ''; + $feed_author = NULL; $author_cache = array(); $items = array(); $entries = array(); @@ -458,23 +469,20 @@ private function parse_microformats(&$data, $url) { if (!isset($mf_item['children'][0]['type'])) continue; if (in_array('h-feed', $mf_item['children'][0]['type'])) { $h_feed = $mf_item['children'][0]; + // In this case the parent of the h-feed may be an h-card, so use it as + // the feed_author. + if (in_array('h-card', $mf_item['type'])) $feed_author = $mf_item; break; } } if (isset($h_feed['children'])) { $entries = $h_feed['children']; - // Also set the feed title and icon from the h-feed if available. + // Also set the feed title and store author from the h-feed if available. if (isset($mf['items'][0]['properties']['name'][0])) { $feed_title = $mf['items'][0]['properties']['name'][0]; } if (isset($mf['items'][0]['properties']['author'][0])) { - $author = $mf['items'][0]['properties']['author'][0]; - if (is_array($author) && - isset($author['type']) && in_array('h-card', $author['type'])) { - if (isset($author['properties']['photo'][0])) { - $icon = $author['properties']['photo'][0]; - } - } + $feed_author = $mf['items'][0]['properties']['author'][0]; } } else { @@ -501,12 +509,13 @@ private function parse_microformats(&$data, $url) { if (isset($title['value'])) $title = $title['value']; $item['title'] = array(array('data' => $title)); } - if (isset($entry['properties']['author'][0])) { + if (isset($entry['properties']['author'][0]) || isset($feed_author)) { // author is a special case, it can be plain text or an h-card array. // If it's plain text it can also be a url that should be followed to // get the actual h-card. - $author = $entry['properties']['author'][0]; - if (is_array($author)) { + $author = isset($entry['properties']['author'][0]) ? + $entry['properties']['author'][0] : $feed_author; + if (!is_string($author)) { $author = $this->parse_hcard($author); } else if (strpos($author, 'http') === 0) { @@ -574,6 +583,11 @@ private function parse_microformats(&$data, $url) { $item['title'] = array(array('data' => $title)); } $description .= $entry['properties']['content'][0]['html']; + if (isset($entry['properties']['in-reply-to'][0]['value'])) { + $in_reply_to = $entry['properties']['in-reply-to'][0]['value']; + $description .= '

'. + ''.$in_reply_to.'

'; + } $item['description'] = array(array('data' => $description)); } if (isset($entry['properties']['category'])) { @@ -608,9 +622,10 @@ private function parse_microformats(&$data, $url) { // Mimic RSS data format when storing microformats. $link = array(array('data' => $url)); $image = ''; - if ($icon !== '') { - array(array('child' => array('' => - array('url' => array(array('data' => $icon)))))); + if (!is_string($feed_author) && + isset($feed_author['properties']['photo'][0])) { + $image = array(array('child' => array('' => array('url' => + array(array('data' => $feed_author['properties']['photo'][0])))))); } // Use the a name given for the h-feed, or get the title from the html. if ($feed_title !== '') { diff --git a/library/SimplePie/Sanitize.php b/library/SimplePie/Sanitize.php index d654db6e5..aeb70391e 100644 --- a/library/SimplePie/Sanitize.php +++ b/library/SimplePie/Sanitize.php @@ -368,8 +368,9 @@ public function sanitize($data, $type, $base = '') // Finally, convert to a HTML string $data = trim($document->saveHTML()); - - list($_, $data, $_) = explode($unique_tag, $data); + $result = explode($unique_tag, $data); + // The tags may not be found again if there was invalid markup. + $data = count($result) === 3 ? $result[1] : ''; if ($this->remove_div) { diff --git a/tests/IRITest.php b/tests/IRITest.php index 2492e3c88..d7011a091 100644 --- a/tests/IRITest.php +++ b/tests/IRITest.php @@ -3,7 +3,7 @@ /** * IRI test cases * - * Copyright (c) 2008-2012 Geoffrey Sneddon. + * Copyright (c) 2008-2016 Geoffrey Sneddon. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,7 +34,7 @@ * * @package IRI * @author Geoffrey Sneddon - * @copyright 2008-2012 Geoffrey Sneddon + * @copyright 2008-2016 Geoffrey Sneddon * @license http://www.opensource.org/licenses/bsd-license.php * @link http://hg.gsnedders.com/iri/ * @@ -380,9 +380,26 @@ public function testInvalidAbsolutizeBase() $this->assertFalse(SimplePie_IRI::absolutize('://not a URL', '../')); } - public function testInvalidAbsolutizeRelative() + public function testInvalidPathNoHost() { - $this->assertFalse(SimplePie_IRI::absolutize('http://example.com/', 'http://example.com//not a URL')); + $iri = new SimplePie_IRI(); + $iri->scheme = 'http'; + $iri->path = '//test'; + $this->assertFalse($iri->is_valid()); + } + + public function testInvalidRelativePathContainsColon() + { + $iri = new SimplePie_IRI(); + $iri->path = '/test:/'; + $this->assertFalse($iri->is_valid()); + } + + public function testValidRelativePathContainsColon() + { + $iri = new SimplePie_IRI(); + $iri->path = '/test/:'; + $this->assertTrue($iri->is_valid()); } public function testFullGamut() @@ -411,22 +428,22 @@ public function testReadAliased() $iri->path = '/test/'; $iri->fragment = 'test'; - $this->assertEquals('http', $iri->ischeme); - $this->assertEquals('user:password', $iri->iuserinfo); - $this->assertEquals('example.com', $iri->ihost); - $this->assertEquals(80, $iri->iport); - $this->assertEquals('/test/', $iri->ipath); - $this->assertEquals('test', $iri->ifragment); + $this->assertEquals('http', $iri->scheme); + $this->assertEquals('user:password', $iri->userinfo); + $this->assertEquals('example.com', $iri->host); + $this->assertEquals(80, $iri->port); + $this->assertEquals('/test/', $iri->path); + $this->assertEquals('test', $iri->fragment); } public function testWriteAliased() { $iri = new SimplePie_IRI(); $iri->scheme = 'http'; - $iri->iuserinfo = 'user:password'; - $iri->ihost = 'example.com'; - $iri->ipath = '/test/'; - $iri->ifragment = 'test'; + $iri->userinfo = 'user:password'; + $iri->host = 'example.com'; + $iri->path = '/test/'; + $iri->fragment = 'test'; $this->assertEquals('http', $iri->scheme); $this->assertEquals('user:password', $iri->userinfo);