Skip to content

Commit

Permalink
Merge pull request #453 from mblaney/master
Browse files Browse the repository at this point in the history
This release fixes an IRI parsing bug reported recently. It also
  • Loading branch information
mblaney committed Jun 14, 2016
2 parents 2f272a0 + 426dc5b commit 9b775e8
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 109 deletions.
85 changes: 35 additions & 50 deletions library/SimplePie.php
Expand Up @@ -1614,25 +1614,44 @@ protected function fetch_data(&$cache)
$copyContentType = $file->headers['content-type'];
try
{
// First check for h-entry microformats in the current file.
$microformats = false;
$position = 0;
while ($position = strpos($file->body, 'h-entry', $position))
{
$start = $position < 200 ? 0 : $position - 200;
$check = substr($file->body, $start, 400);
if ($microformats = preg_match('/class="[^"]*h-entry/', $check))
if (function_exists('Mf2\parse')) {
// Check for both h-feed and h-entry, as both a feed with no entries
// and a list of entries without an h-feed wrapper are both valid.
$position = 0;
while ($position = strpos($file->body, 'h-feed', $position))
{
break;
$start = $position < 200 ? 0 : $position - 200;
$check = substr($file->body, $start, 400);
if ($microformats = preg_match('/class="[^"]*h-feed/', $check))
{
break;
}
$position += 7;
}
$position = 0;
while ($position = strpos($file->body, 'h-entry', $position))
{
$start = $position < 200 ? 0 : $position - 200;
$check = substr($file->body, $start, 400);
if ($microformats = preg_match('/class="[^"]*h-entry/', $check))
{
break;
}
$position += 7;
}
$position += 7;
}
// Now also do feed discovery, but if an h-entry was found don't
// overwrite the current value of file.
$discovered = $locate->find($this->autodiscovery,
$this->all_discovered_feeds);
if ($microformats)
{
if ($hub = $locate->get_rel_link('hub'))
{
$self = $locate->get_rel_link('self');
$this->store_links($file, $hub, $self);
}
// Push the current file onto all_discovered feeds so the user can
// be shown this as one of the options.
if (isset($this->all_discovered_feeds)) {
Expand Down Expand Up @@ -1681,7 +1700,6 @@ protected function fetch_data(&$cache)

$this->raw_data = $file->body;
$this->permanent_url = $file->permanent_url;
$this->store_links($file);
$headers = $file->headers;
$sniffer = $this->registry->create('Content_Type_Sniffer', array(&$file));
$sniffed = $sniffer->get_type();
Expand Down Expand Up @@ -3221,52 +3239,19 @@ public static function merge_items($urls, $start = 0, $end = 0, $limit = 0)
*
* There is no way to find PuSH links in the body of a microformats feed,
* so they are added to the headers when found, to be used later by get_links.
* @param SimplePie_File
* @param SimplePie_File $file
* @param string $hub
* @param string $self
*/
private function store_links(&$file) {
private function store_links(&$file, $hub, $self) {
if (isset($file->headers['link']['hub']) ||
(isset($file->headers['link']) &&
preg_match('/rel=hub/', $file->headers['link'])))
{
return;
}
$hub = '';
$self = '';
$position = 0;
$regex1 = '/<(?:link|a) href="([^"]*)" rel="[^"]*hub[^"]*"/';
$regex2 = '/<(?:link|a) rel="[^"]*hub[^"]*" href="([^"]*)"/';
while ($position = strpos($file->body, 'rel="hub"', $position + 7))
{
$start = $position < 200 ? 0 : $position - 200;
$check = substr($file->body, $start, 400);
if (preg_match($regex1, $check, $match))
{
$hub = $match[1] === '' ? $file->url : $match[1];
}
else if (preg_match($regex2, $check, $match))
{
$hub = $match[1] === '' ? $file->url : $match[1];
}
if ($hub !== '') break;
}
$position = 0;
$regex1 = '/<(?:link|a) href="([^"]*)" rel="[^"]*self[^"]*"/';
$regex2 = '/<(?:link|a) rel="[^"]*self[^"]*" href="([^"]*)"/';
while ($position = strpos($file->body, 'rel="self"', $position + 7))
{
$start = $position < 200 ? 0 : $position - 200;
$check = substr($file->body, $start, 400);
if (preg_match($regex1, $check, $match))
{
$self = $match[1] === '' ? $file->url : $match[1];
}
if (preg_match($regex2, $check, $match))
{
$self = $match[1] === '' ? $file->url : $match[1];
}
if ($self !== '') break;
}
if ($hub !== '')

if ($hub)
{
if (isset($file->headers['link']))
{
Expand All @@ -3280,7 +3265,7 @@ private function store_links(&$file) {
$file->headers['link'] = '';
}
$file->headers['link'] .= '<'.$hub.'>; rel=hub';
if ($self !== '')
if ($self)
{
$file->headers['link'] .= ', <'.$self.'>; rel=self';
}
Expand Down
32 changes: 14 additions & 18 deletions library/SimplePie/IRI.php
Expand Up @@ -776,24 +776,20 @@ protected function scheme_normalization()
*/
public function is_valid()
{
$isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
if ($this->ipath !== '' &&
(
$isauthority && (
$this->ipath[0] !== '/' ||
substr($this->ipath, 0, 2) === '//'
) ||
(
$this->scheme === null &&
!$isauthority &&
strpos($this->ipath, ':') !== false &&
(strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
)
)
)
{
return false;
}
if ($this->ipath === '') return true;

$isauthority = $this->iuserinfo !== null || $this->ihost !== null ||
$this->port !== null;
if ($isauthority && $this->ipath[0] === '/') return true;

if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) return false;

// Relative urls cannot have a colon in the first path segment (and the
// slashes themselves are not included so skip the first character).
if (!$this->scheme && !$isauthority &&
strpos($this->ipath, ':') !== false &&
strpos($this->ipath, '/', 1) !== false &&
strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) return false;

return true;
}
Expand Down
53 changes: 52 additions & 1 deletion library/SimplePie/Locator.php
Expand Up @@ -281,7 +281,7 @@ public function get_links()
{
$href = trim($link->getAttribute('href'));
$parsed = $this->registry->call('Misc', 'parse_url', array($href));
if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme']))
if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme']))
{
if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo())
{
Expand Down Expand Up @@ -318,6 +318,57 @@ public function get_links()
return null;
}

public function get_rel_link($rel)
{
if ($this->dom === null)
{
throw new SimplePie_Exception('DOMDocument not found, unable to use '.
'locator');
}
if (!class_exists('DOMXpath'))
{
throw new SimplePie_Exception('DOMXpath not found, unable to use '.
'get_rel_link');
}

$xpath = new DOMXpath($this->dom);
$query = '//a[@rel and @href] | //link[@rel and @href]';
foreach ($xpath->query($query) as $link)
{
$href = trim($link->getAttribute('href'));
$parsed = $this->registry->call('Misc', 'parse_url', array($href));
if ($parsed['scheme'] === '' ||
preg_match('/^https?$/i', $parsed['scheme']))
{
if (method_exists($link, 'getLineNo') &&
$this->base_location < $link->getLineNo())
{
$href =
$this->registry->call('Misc', 'absolutize_url',
array(trim($link->getAttribute('href')),
$this->base));
}
else
{
$href =
$this->registry->call('Misc', 'absolutize_url',
array(trim($link->getAttribute('href')),
$this->http_base));
}
if ($href === false)
{
return null;
}
$rel_values = explode(' ', strtolower($link->getAttribute('rel')));
if (in_array($rel, $rel_values))
{
return $href;
}
}
}
return null;
}

public function extension(&$array)
{
foreach ($array as $key => $value)
Expand Down
63 changes: 39 additions & 24 deletions library/SimplePie/Parser.php
Expand Up @@ -76,14 +76,27 @@ public function set_registry(SimplePie_Registry $registry)

public function parse(&$data, $encoding, $url = '')
{
$position = 0;
while ($position = strpos($data, 'h-entry', $position)) {
$start = $position < 200 ? 0 : $position - 200;
$check = substr($data, $start, 400);
if (preg_match('/class="[^"]*h-entry/', $check)) {
return $this->parse_microformats($data, $url);
if (function_exists('Mf2\parse')) {
// Check for both h-feed and h-entry, as both a feed with no entries
// and a list of entries without an h-feed wrapper are both valid.
$position = 0;
while ($position = strpos($data, 'h-feed', $position)) {
$start = $position < 200 ? 0 : $position - 200;
$check = substr($data, $start, 400);
if (preg_match('/class="[^"]*h-feed/', $check)) {
return $this->parse_microformats($data, $url);
}
$position += 7;
}
$position = 0;
while ($position = strpos($data, 'h-entry', $position)) {
$start = $position < 200 ? 0 : $position - 200;
$check = substr($data, $start, 400);
if (preg_match('/class="[^"]*h-entry/', $check)) {
return $this->parse_microformats($data, $url);
}
$position += 7;
}
$position += 7;
}

// Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
Expand Down Expand Up @@ -439,10 +452,8 @@ private function parse_hcard($data, $category = false) {
}

private function parse_microformats(&$data, $url) {
if (!function_exists('Mf2\parse')) return false;

$feed_title = '';
$icon = '';
$feed_author = NULL;
$author_cache = array();
$items = array();
$entries = array();
Expand All @@ -458,23 +469,20 @@ private function parse_microformats(&$data, $url) {
if (!isset($mf_item['children'][0]['type'])) continue;
if (in_array('h-feed', $mf_item['children'][0]['type'])) {
$h_feed = $mf_item['children'][0];
// In this case the parent of the h-feed may be an h-card, so use it as
// the feed_author.
if (in_array('h-card', $mf_item['type'])) $feed_author = $mf_item;
break;
}
}
if (isset($h_feed['children'])) {
$entries = $h_feed['children'];
// Also set the feed title and icon from the h-feed if available.
// Also set the feed title and store author from the h-feed if available.
if (isset($mf['items'][0]['properties']['name'][0])) {
$feed_title = $mf['items'][0]['properties']['name'][0];
}
if (isset($mf['items'][0]['properties']['author'][0])) {
$author = $mf['items'][0]['properties']['author'][0];
if (is_array($author) &&
isset($author['type']) && in_array('h-card', $author['type'])) {
if (isset($author['properties']['photo'][0])) {
$icon = $author['properties']['photo'][0];
}
}
$feed_author = $mf['items'][0]['properties']['author'][0];
}
}
else {
Expand All @@ -501,12 +509,13 @@ private function parse_microformats(&$data, $url) {
if (isset($title['value'])) $title = $title['value'];
$item['title'] = array(array('data' => $title));
}
if (isset($entry['properties']['author'][0])) {
if (isset($entry['properties']['author'][0]) || isset($feed_author)) {
// author is a special case, it can be plain text or an h-card array.
// If it's plain text it can also be a url that should be followed to
// get the actual h-card.
$author = $entry['properties']['author'][0];
if (is_array($author)) {
$author = isset($entry['properties']['author'][0]) ?
$entry['properties']['author'][0] : $feed_author;
if (!is_string($author)) {
$author = $this->parse_hcard($author);
}
else if (strpos($author, 'http') === 0) {
Expand Down Expand Up @@ -574,6 +583,11 @@ private function parse_microformats(&$data, $url) {
$item['title'] = array(array('data' => $title));
}
$description .= $entry['properties']['content'][0]['html'];
if (isset($entry['properties']['in-reply-to'][0]['value'])) {
$in_reply_to = $entry['properties']['in-reply-to'][0]['value'];
$description .= '<p><span class="in-reply-to"></span> '.
'<a href="'.$in_reply_to.'">'.$in_reply_to.'</a><p>';
}
$item['description'] = array(array('data' => $description));
}
if (isset($entry['properties']['category'])) {
Expand Down Expand Up @@ -608,9 +622,10 @@ private function parse_microformats(&$data, $url) {
// Mimic RSS data format when storing microformats.
$link = array(array('data' => $url));
$image = '';
if ($icon !== '') {
array(array('child' => array('' =>
array('url' => array(array('data' => $icon))))));
if (!is_string($feed_author) &&
isset($feed_author['properties']['photo'][0])) {
$image = array(array('child' => array('' => array('url' =>
array(array('data' => $feed_author['properties']['photo'][0]))))));
}
// Use the a name given for the h-feed, or get the title from the html.
if ($feed_title !== '') {
Expand Down
5 changes: 3 additions & 2 deletions library/SimplePie/Sanitize.php
Expand Up @@ -368,8 +368,9 @@ public function sanitize($data, $type, $base = '')

// Finally, convert to a HTML string
$data = trim($document->saveHTML());

list($_, $data, $_) = explode($unique_tag, $data);
$result = explode($unique_tag, $data);
// The tags may not be found again if there was invalid markup.
$data = count($result) === 3 ? $result[1] : '';

if ($this->remove_div)
{
Expand Down

0 comments on commit 9b775e8

Please sign in to comment.