Browse files

Bringing XML_Feed_Parser into CVS

git-svn-id: http://svn.php.net/repository/pear/packages/XML_Feed_Parser/trunk@198192 c90b9560-bf6c-de11-be94-00142212c4b1
  • Loading branch information...
1 parent 07dda65 commit 5446a6b2325c5d74719a407c7bebd9723cb48d57 James Stewart committed Oct 11, 2005
View
260 Parser.php
@@ -0,0 +1,260 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+/**
+ * Key gateway class for XML_Feed_Parser package
+ *
+ * PHP versions 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package XML_Feed_Parser
+ * @author James Stewart <james@jystewart.net>
+ * @copyright 2005 James Stewart <james@jystewart.net>
+ * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL
+ * @version CVS: $Id$
+ * @link http://dev.jystewart.net/XML_Feed_Parser/
+ */
+
+/**
+ * XML_Feed_Parser_Type is an abstract class required by all of our
+ * feed types. It makes sense to load it here to keep the other files
+ * clean.
+ */
+require_once 'Parser/Type.php';
+
+/**
+ * We will throw exceptions when errors occur.
+ */
+require_once 'Parser/Exception.php';
+
+/**
+ * This is the core of the XML_Feed_Parser package. It identifies feed types
+ * and abstracts access to them. It is an iterator, allowing for easy access
+ * to the entire feed.
+ *
+ * @author James Stewart <james@jystewart.net>
+ * @version 0.2.2 - 22nd September 2005
+ * @package XML_Feed_Parser
+ */
+class XML_Feed_Parser implements Iterator
+{
+ /**
+ * This is where we hold the feed object
+ * @var Object
+ */
+ private $feed;
+
+ /**
+ * To allow for extensions, we make a public reference to the feed model
+ * @var DOMDocument
+ */
+ public $model;
+
+ /**
+ * A map between entry ID and offset
+ * @var array
+ */
+ protected $idMappings = array();
+
+ /**
+ * Our constructor takes care of detecting feed types and instantiating
+ * appropriate classes. For now we're going to treat Atom 0.3 as Atom 1.0
+ * but raise a warning. I do not intend to introduce full support for
+ * Atom 0.3 or RSS < 1.0, but others are welcome to.
+ *
+ * @param string $feed XML serialization of the feed
+ * @param bool $strict Whether or not to validate the feed
+ * @todo Work out super-extensible way of defining type tests
+ */
+ function __construct($feed, $strict = false)
+ {
+ $this->model = new DOMDocument;
+ $this->model->loadXML($feed);
+
+ /* detect feed type */
+ $doc_element = $this->model->documentElement;
+ switch (true) {
+ case ($doc_element->namespaceURI == 'http://www.w3.org/2005/Atom'):
+ require_once 'Parser/Atom.php';
+ require_once 'Parser/AtomElement.php';
+ $class = 'XML_Feed_Parser_Atom';
+ break;
+ case ($doc_element->namespaceURI == 'http://purl.org/atom/ns#'):
+ require_once 'Parser/Atom.php';
+ require_once 'Parser/AtomElement.php';
+ $class = 'XML_Feed_Parser_Atom';
+ trigger_error(
+ 'Atom 0.3 deprecated, using 1.0 parser which won't provide
+ all options'', E_USER_WARNING);
+ break;
+ case ($doc_element->childNodes->item(1)->namespaceURI ==
+ 'http://purl.org/rss/1.0/'):
+ require_once 'Parser/RSS1.php';
+ require_once 'Parser/RSS1Element.php';
+ $class = 'XML_Feed_Parser_RSS1';
+ break;
+ case ($doc_element->tagName == 'rss'):
+ if ($doc_element->hasAttribute('version') and
+ $doc_element->getAttribute('version') == 2) {
+ require_once 'Parser/RSS2.php';
+ require_once 'Parser/RSS2Element.php';
+ $class = 'XML_Feed_Parser_RSS2';
+ }
+ break;
+ default:
+ throw new XML_Feed_Parser_Exception('Feed type unknown');
+ break;
+ }
+
+ /* Instantiate feed object */
+ $this->feed = new $class($this->model, $strict);
+ }
+
+ /**
+ * For top-level feed elements we will provide access using
+ * methods or attributes. This function simply passes on a
+ * request to the appropriate feed type object.
+ *
+ * @param string $call - the method being called
+ * @param array $attributes
+ */
+ function __call($call, $attributes)
+ {
+ return $this->feed->$call($attributes);
+ }
+
+ /**
+ * To allow variable-like access to feed-level data we use this
+ * method. It simply passes along to __call() which in turn passes
+ * along to the relevant object.
+ *
+ * @param string $val - the name of the variable required
+ */
+ function __get($val)
+ {
+ return $this->feed->$val;
+ }
+
+ /**
+ * Of course we must be able to iterate... This function simply increases
+ * our internal counter.
+ */
+ function next()
+ {
+ if (isset($this->current_item) &&
+ $this->current_item <= $this->feed->numberEntries - 1) {
+ ++$this->current_item;
+ } else if (! isset($this->current_item)) {
+ $this->current_item = 0;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Return XML_Feed_Type object for current element
+ *
+ * @return XML_Feed_Parser_Type Object
+ */
+ function current()
+ {
+ return $this->getEntryByOffset($this->current_item);
+ }
+
+ /**
+ * Part of the iteration implementation. Returns the key for the current
+ * stage in the array.
+ *
+ * @return int
+ */
+ function key()
+ {
+ return $this->current_item;
+ }
+
+ /**
+ * Part of the iteration implementation. Tells whether we have reached the
+ * end.
+ *
+ * @return bool
+ */
+ function valid()
+ {
+ return $this->current_item < $this->feed->numberEntries;
+ }
+
+ /**
+ * Part of the iteration implementation. Resets the internal counter
+ * to the beginning.
+ */
+ function rewind()
+ {
+ $this->current_item = 0;
+ }
+
+ /**
+ * As well as allowing the items to be iterated over we want to allow
+ * users to be able to access a specific entry. This is one of two ways of
+ * doing that, the other being by offset. This method can be quite slow
+ * if dealing with a large feed that hasn't yet been processed as it
+ * instantiates objects for every entry until it finds the one needed.
+ *
+ * @param string $id
+ * @return XML_Feed_Parser_Type|false
+ */
+ function getEntryById($id)
+ {
+ if (isset($this->idMappings[$id])) {
+ return $this->getEntryByOffset($this->idMappings[$id]);
+ }
+
+ /*
+ * Since we have not yet encountered that ID, let's go through all the
+ * remaining entries in order till we find it.
+ * This is a fairly slow implementation, but it should work.
+ */
+ return $this->feed->getEntryById($id);
+
+ return false;
+ }
+
+ /**
+ * As well as allowing the items to be iterated over we want to allow
+ * users to be able to access a specific entry. This is one of two ways of
+ * doing that, the other being by ID.
+ *
+ * @param int $offset
+ * @return XML_Feed_Parser_Type|false
+ */
+ function getEntryByOffset($offset)
+ {
+ if ($offset < $this->feed->numberEntries) {
+ if (isset($this->feed->entries[$offset])) {
+ return $this->feed->entries[$offset];
+ } else {
+ try {
+ $this->feed->getEntryByOffset($offset);
+ } catch (Exception $e) {
+ return false;
+ }
+ $id = $this->feed->entries[$offset]->getID();
+ $this->idMappings[$id] = $offset;
+ return $this->feed->entries[$offset];
+ }
+ } else {
+ return false;
+ }
+ }
+
+ function __toString()
+ {
+ return $this->feed->__toString();
+ }
+}
+?>
View
242 Parser/Atom.php
@@ -0,0 +1,242 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+/**
+ * Atom feed class for XML_Feed_Parser
+ *
+ * PHP versions 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package XML_Feed_Parser
+ * @author James Stewart <james@jystewart.net>
+ * @copyright 2005 James Stewart <james@jystewart.net>
+ * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
+ * @version CVS: $Id$
+ * @link http://dev.jystewart.net/XML_Feed_Parser/
+*/
+
+/**
+ * This is the class that determines how we manage Atom 1.0 feeds
+ *
+ * How we deal with constructs:
+ * date - return as unix datetime for use with the 'date' function unless specified otherwise
+ * text - return as is. optional parameter will give access to attributes
+ * person - defaults to name, but parameter based access
+ *
+ * @author James Stewart <james@jystewart.net>
+ * @version 0.2.2 22nd September 2005
+ * @package XML_Feed_Parser
+ * @todo Improve attribute access
+ */
+class XML_Feed_Parser_Atom extends XML_Feed_Parser_Type
+{
+ /**
+ * The URI of the RelaxNG schema used to (optionally) validate the feed
+ * @var string
+ */
+ private $relax = 'http://atompub.org/2005/07/11/atom.rnc';
+
+ /**
+ * We're likely to use XPath, so let's keep it global
+ * @var DOMXPath
+ */
+ public $xpath;
+
+ /**
+ * When performing XPath queries we will use this prefix
+ * @var string
+ */
+ private $xpathPrefix = '//';
+
+ /**
+ * The feed type we are parsing
+ * @var string
+ */
+ public $version = 'Atom 1.0';
+
+ /**
+ * The class used to represent individual items
+ * @var string
+ */
+ protected $itemClass = 'XML_Feed_Parser_AtomElement';
+
+ /**
+ * The element containing entries
+ * @var string
+ */
+ protected $itemElement = 'entry';
+
+ /**
+ * Here we map those elements we're not going to handle individually
+ * to the constructs they are. The optional second parameter in the array
+ * tells the parser whether to 'fall back' (not apt. at the feed level) or
+ * fail if the element is missing. If the parameter is not set, the function
+ * will simply return false and leave it to the client to decide what to do.
+ * @var array
+ */
+ protected $map = array(
+ 'author' => array('Person'),
+ 'contributor' => array('Contributor'),
+ 'icon' => array('Text'),
+ 'id' => array('Text', 'fail'),
+ 'rights' => array('Text'),
+ 'subtitle' => array('Text'),
+ 'title' => array('Text', 'fail'),
+ 'updated' => array('Date', 'fail'),
+ 'link' => array('Link'),
+ 'generator' => array('Text'));
+
+ /**
+ * Here we provide a few mappings for those very special circumstances in
+ * which it makes sense to map back to the RSS2 spec. Key is RSS2 version
+ * value is an array consisting of the equivalent in atom and any attributes
+ * needed to make the mapping.
+ * @var array
+ */
+ protected $compatMap = array();
+
+ /**
+ * Our constructor does nothing more than its parent.
+ *
+ * @param DOMDocument $xml A DOM object representing the feed
+ * @param bool (optional) $string Whether or not to validate this feed
+ */
+ function __construct(DOMDocument $model, $strict = false)
+ {
+ $this->model = $model;
+
+ if ($strict) {
+ if (! $this->model->relaxNGValidateSource($this->relax)) {
+ throw new XML_Feed_Parser_Exception('Failed required validation');
+ }
+ }
+
+ $this->xpath = new DOMXPath($this->model);
+ $this->xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom');
+ $this->numberEntries = $this->count('entry');
+ }
+
+ /**
+ * This function uses XPath to get the entry based on its ID. Ideally we
+ * would also use XPath to find the offset of that node and therefore cache
+ * it, but the necessary XPath support isn't coming until at least PHP5.1.
+ * Once it is available, I will try to implement support for it for those users
+ * on a capable platform.
+ *
+ * @param string $id any valid Atom ID.
+ * @return XML_Feed_Parser_AtomElement
+ */
+ function getEntryById($id)
+ {
+ if (isset($this->idMappings[$id])) {
+ return $this->entries[$this->idMappings[$id]];
+ }
+
+ $entries = $this->xpath->query("//atom:entry[atom:id='$id']");
+ if ($entries->length > 0) {
+ $xmlBase = $this->getBase($entries->item(0));
+ $entry = new $this->itemElement($entries->item(0), $this, $xmlBase);
+ return $entry;
+ }
+
+ }
+
+ /**
+ * Get a person construct. We default to the 'name' element but allow
+ * access to any of the elements.
+ *
+ * @param string $method The name of the person construct we want
+ * @param array $arguments An array which we hope gives a 'param'
+ * @return string|false
+ */
+ protected function getPerson($method, $arguments)
+ {
+ $offset = empty($arguments[0]) ? 0 : $arguments[0];
+ $arguments = empty($arguments[1]) ? array() : $arguments[1];
+ $section = $this->model->getElementsByTagName($method);
+ if ($section->length == 0 or $section->length < $offset+1) {
+ return false;
+ }
+ if (isset($arguments['param'])) {
+ $parameter = $arguments['param'];
+ } else {
+' $parameter = 'name';
+ }
+ $param = $section->item($offset)->getElementsByTagName($parameter);
+ if ($param->length == 0) {
+ return false;
+ }
+ return $param->item(0)->nodeValue;
+ }
+
+ /**
+ * Get a text construct. This is probably our most complex basic type as
+ * we will want the option to return attributes.
+ *
+ * @todo Build in attribute support
+ * @todo Handle elements that recur
+ * @param string $method The name of the text construct we want
+ * @param array $arguments An array which we hope gives a 'param'
+ * @return string
+ */
+ protected function getText($method, $arguments)
+ {
+ $tags = $this->model->getElementsByTagName($method);
+ if ($tags->length > 0) {
+ if ($tags->item(0)->hasChildNodes() and
+ $tags->item(0)->childNodes->length > 1) {
+ $value = '';
+ foreach ($tags->item(0)->childNodes as $child) {
+ $simple = simplexml_import_dom($child);
+ $value .= $simple->asXML();
+ }
+ return $value;
+ } else {
+ return $tags->item(0)->nodeValue;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * This element must be present at least once with rel="feed"
+ * This element may be present any number of further times so long as there is no clash
+ *
+ * @param int $offset the position of the link within the container
+ * @param string $attribute the attribute name required
+ * @param array an array of attributes to search by
+ * @return string the value of the attribute
+ */
+ function getLink($offset = 0, $attribute = 'href', $params = false)
+ {
+ if (is_array($params) and !empty($params)) {
+ $terms = array();
+
+ foreach ($params as $key => $value) {
+ $terms[] = "@$key='$value'";
+ }
+
+ $query = $this->xpathPrefix . 'atom:link[' . join(' and ', $terms) . ']';
+ $links = $this->xpath->query($query);
+ } else {
+ $links = $this->model->getElementsByTagName('link');
+ }
+ if ($links->length > $offset) {
+ if ($links->item($offset)->hasAttribute($attribute)) {
+ $value = $links->item($offset)->getAttribute($attribute);
+ if ($attribute == 'href') {
+ $value = $this->addBase($value, $links->item($offset));
+ }
+ return $value;
+ }
+ }
+ }
+}
+
+?>
View
210 Parser/AtomElement.php
@@ -0,0 +1,210 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+/**
+ * AtomElement class for XML_Feed_Parser package
+ *
+ * PHP versions 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package XML_Feed_Parser
+ * @author James Stewart <james@jystewart.net>
+ * @copyright 2005 James Stewart <james@jystewart.net>
+ * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
+ * @version CVS: $Id$
+ * @link http://dev.jystewart.net/XML_Feed_Parser/
+ */
+
+/**
+ * This class provides support for atom entries. It will usually be called by
+ * XML_Feed_Parser_Atom with which it shares many methods.
+ *
+ * @author James Stewart <james@jystewart.net>
+ * @version 0.2.2 22nd September 2005
+ * @package XML_Feed_Parser
+ * @todo Deal with xml:base
+ */
+class XML_Feed_Parser_AtomElement extends XML_Feed_Parser_Atom
+{
+ /**
+ * This will be a reference to the parent object for when we want
+ * to use a 'fallback' rule
+ * @var XML_Feed_Parser_Atom
+ */
+ protected $parent;
+
+ /**
+ * When performing XPath queries we will use this prefix
+ * @var string
+ */
+ private $xpathPrefix = '';
+
+ /**
+ * xml:base values inherited by the element
+ * @var string
+ */
+ protected $xmlBase;
+
+ /**
+ * Here we provide a few mappings for those very special circumstances in
+ * which it makes sense to map back to the RSS2 spec. Key is RSS2 version
+ * value is an array consisting of the equivalent in atom and any attributes
+ * needed to make the mapping.
+ * @var array
+ */
+ protected $compatMap = array();
+
+ /**
+ * Our specific element map
+ * @var array
+ */
+ protected $map = array(
+ 'author' => array('Person', 'fallback'),
+ 'contributor' => array('Person'),
+ 'id' => array('Text', 'fail'),
+ 'published' => array('Date'),
+ 'updated' => array('Date', 'fail'),
+ 'title' => array('Text', 'fail'),
+ 'rights' => array('Text', 'fallback'),
+ 'summary' => array('Text'),
+ 'content' => array('Content'),
+ 'link' => array('Link'),
+ 'enclosure' => array('Enclosure'));
+
+ /**
+ * Store useful information for later.
+ *
+ * @param DOMElement $element - this item as a DOM element
+ * @param XML_Feed_Parser_Atom $parent - the feed of which this is a member
+ */
+ function __construct(DOMElement $element, $parent, $xmlBase = '')
+ {
+ $this->model = $element;
+ $this->parent = $parent;
+ $this->xmlBase = $xmlBase;
+ $this->xpathPrefix = "//atom:entry[atom:id='" . $this->id . "']/";
+ }
+
+ /**
+ * author data at the entry level is more complex than at the feed level.
+ * If atom:author is not present for the entry we need to look for it in
+ * an atom:source child of the atom:entry. If it's not there either, then
+ * we look to the parent for data.
+ *
+ * @param array
+ * @return string
+ */
+ function getAuthor($arguments)
+ {
+ /* Find out which part of the author data we're looking for */
+ if (isset($arguments['param'])) {
+ $parameter = $arguments['param'];
+ } else {
+ $parameter = 'name';
+ }
+
+ $test = $this->model->getElementsByTagName('author');
+ if ($test->length > 0) {
+ $item = $test->item(0);
+ return $item->getElementsByTagName($parameter)->item(0)->nodeValue;
+ }
+
+ $source = $this->model->getElementsByTagName('source');
+ if ($source->length > 0) {
+ $test = $this->model->getElementsByTagName('author');
+ if ($test->length > 0) {
+ $item = $test->item(0);
+ return $item->getElementsByTagName($parameter)->item(0)->nodeValue;
+ }
+ }
+ return $this->parent->getAuthor($arguments);
+ }
+
+ /**
+ * This element may or may not be present. It cannot be present more than
+ * once. It may have a 'src' attribute, in which case there's no content
+ * If not present, then the entry must have link with rel="alternate".
+ * If there is content we return it, if not and there's a 'src' attribute
+ * we return the value of that instead.
+ *
+ * @todo Be clearer about content types
+ * @return string|false
+ */
+ function getContent()
+ {
+ $tags = $this->model->getElementsByTagName('content');
+ if ($tags->length > 0) {
+ $value = $tags->item(0);
+ if ($value->hasChildNodes()) {
+ $content = '';
+ foreach ($value->childNodes as $child) {
+ if ($child instanceof DOMText) {
+ $content .= $child->nodeValue;
+ } else {
+ $simple = simplexml_import_dom($child);
+ $content .= $simple->asXML();
+ }
+ }
+ return $content;
+ } else if ($value->nodeValue) {
+ return $value->nodeValue;
+ } else if ($value->getAttribute('src')) {
+ return $this->addBase($value->getAttribute('src'), $value);
+ }
+ }
+ return false;
+ }
+
+ /**
+ * The Atom spec doesn't provide for an enclosure element, but it is
+ * generally supported using the link element with rel='enclosure'.
+ *
+ * @param string $method - for compatibility with our __call usage
+ * @param array $arguments - for compatibility with our __call usage
+ * @return array|false
+ */
+ function getEnclosure($method, $arguments = array())
+ {
+ $offset = isset($arguments[0]) ? $arguments[0] : 0;
+ $query = "//atom:entry[atom:id='" . $this->getText('id', false) .
+ "']/atom:link[@rel='enclosure']";
+ $encs = $this->parent->xpath->query($query);
+ if ($encs->length > 0 and $encs->length >= $offset) {
+ try {
+ $attrs = $encs->item($offset)->attributes;
+ return array(
+ 'url' => $attrs->getNamedItem('href')->value,
+ 'type' => $attrs->getNamedItem('type')->value);
+ } catch (Exception $e) {
+ return false;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Where an atom:entry is taken from another feed then the aggregator
+ * is supposed to include an atom:source element which replicates at least
+ * the atom:id, atom:title, and atom:updated metadata from the original
+ * feed. Atom:source therefore has a very similar structure to atom:feed
+ * and if we find it we will return it as an XML_Feed_Parser_Atom object.
+ *
+ * @return XML_Feed_Parser_Atom|false
+ */
+ function getSource()
+ {
+ $test = $this->model->getElementsByTagName('source');
+ if ($test->length == 0) {
+ return false;
+ }
+ $source = new XML_Feed_Parser_Atom($test->item(0));
+ }
+}
+
+?>
View
42 Parser/Exception.php
@@ -0,0 +1,42 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+/**
+ * Keeps the exception class for XML_Feed_Parser.
+ *
+ * PHP versions 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package XML_Feed_Parser
+ * @author James Stewart <james@jystewart.net>
+ * @copyright 2005 James Stewart <james@jystewart.net>
+ * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL
+ * @version CVS: $Id$
+ * @link http://dev.jystewart.net/XML_Feed_Parser/
+ */
+
+/**
+ * We are extending PEAR_Exception
+ */
+require_once 'PEAR/Exception.php';
+
+/**
+ * XML_Feed_Parser_Exception is a simple extension of PEAR_Exception, existing
+ * to help with identification of the source of exceptions.
+ *
+ * @author James Stewart <james@jystewart.net>
+ * @version 0.2.3 - 3rd October 2005
+ * @package XML_Feed_Parser
+ */
+class XML_Feed_Parser_Exception extends PEAR_Exception
+{
+
+}
+
+?>
View
253 Parser/RSS1.php
@@ -0,0 +1,253 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+/**
+ * RSS1 class for XML_Feed_Parser
+ *
+ * PHP versions 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package XML_Feed_Parser
+ * @author James Stewart <james@jystewart.net>
+ * @copyright 2005 James Stewart <james@jystewart.net>
+ * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
+ * @version CVS: $Id$
+ * @link http://dev.jystewart.net/XML_Feed_Parser/
+ */
+
+/**
+ * This class handles RSS1.0 feeds.
+ *
+ * @author James Stewart <james@jystewart.net>
+ * @version 0.2.2 22nd September 2005
+ * @package XML_Feed_Parser
+ * @todo Find a Relax NG URI we can use
+ * @todo Implement support for namespaces/modules
+ */
+class XML_Feed_Parser_RSS1 extends XML_Feed_Parser_Type
+{
+ /**
+ * The URI of the RelaxNG schema used to (optionally) validate the feed
+ * @var string
+ */
+ private $relax = '';
+
+ /**
+ * We're likely to use XPath, so let's keep it global
+ * @var DOMXPath
+ */
+ protected $xpath;
+
+ /**
+ * The feed type we are parsing
+ * @var string
+ */
+ public $version = 'RSS 1.0';
+
+ /**
+ * The class used to represent individual items
+ * @var string
+ */
+ protected $itemClass = 'XML_Feed_Parser_RSS1Element';
+
+ /**
+ * The element containing entries
+ * @var string
+ */
+ protected $itemElement = 'item';
+
+ /**
+ * Here we map those elements we're not going to handle individually
+ * to the constructs they are. The optional second parameter in the array
+ * tells the parser whether to 'fall back' (not apt. at the feed level) or
+ * fail if the element is missing. If the parameter is not set, the function
+ * will simply return false and leave it to the client to decide what to do.
+ * @var array
+ */
+ protected $map = array(
+ 'title' => array('Text'),
+ 'link' => array('Text'),
+ 'description' => array('Text'),
+ 'image' => array('Image'),
+ 'textinput' => array('TextInput'),
+ 'updatePeriod' => array('Text'),
+ 'updateFrequency' => array('Text'),
+ 'updateBase' => array('Date'),
+ 'rights' => array('Text'), # dc:rights
+ 'description' => array('Text'), # dc:description
+ 'creator' => array('Text'), # dc:creator
+ 'publisher' => array('Text'), # dc:publisher
+ 'contributor' => array('Text'), # dc:contributor
+ 'date' => array('Date') # dc:contributor
+ );
+
+ /**
+ * Here we map some elements to their atom equivalents. This is going to be
+ * quite tricky to pull off effectively (and some users' methods may vary)
+ * but is worth trying. The key is the atom version, the value is RSS2.
+ * @var array
+ */
+ protected $compatMap = array(
+ 'title' => array('title'),
+ 'link' => array('link'),
+ 'subtitle' => array('description'),
+ 'author' => array('creator'),
+ 'updated' => array('date'));
+
+ /**
+ * We will be working with multiple namespaces and it is useful to
+ * keep them together
+ * @var array
+ */
+ protected $namespaces = array(
+ 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
+ 'rss' => 'http://purl.org/rss/1.0/',
+ 'dc' => 'http://purl.org/rss/1.0/modules/dc/',
+ 'content' => 'http://purl.org/rss/1.0/modules/content/',
+ 'sy' => 'http://web.resource.org/rss/1.0/modules/syndication/');
+
+ /**
+ * Our constructor does nothing more than its parent.
+ *
+ * @todo RelaxNG validation
+ * @param DOMDocument $xml A DOM object representing the feed
+ * @param bool (optional) $string Whether or not to validate this feed
+ */
+ function __construct(DOMDocument $model, $strict = false)
+ {
+ $this->model = $model;
+
+ $this->xpath = new DOMXPath($model);
+ foreach ($this->namespaces as $key => $value) {
+ $this->xpath->registerNamespace($key, $value);
+ }
+ $this->numberEntries = $this->count('item');
+ }
+
+ /**
+ * This is not really something that will work with RSS1 as it does not have
+ * clear restrictions on the global uniqueness of IDs. We will employ the
+ * _very_ hit and miss method of selecting entries based on the rdf:about
+ * attribute.
+ *
+ * @param string $id any valid ID.
+ * @return XML_Feed_Parser_RSS1Element
+ */
+ function getEntryById($id)
+ {
+ if (isset($this->idMappings[$id])) {
+ return $this->entries[$this->idMappings[$id]];
+ }
+
+ $entries = $this->xpath->query("//rss:item[@rdf:about='$id']");
+ if ($entries->length > 0) {
+ $classname = $this->itemClass;
+ $entry = new $classname($entries->item(0), $this);
+ return $entry;
+ }
+ }
+
+ /**
+ * Get details of the image associated with the feed.
+ *
+ * @return array|false an array simply containing the child elements
+ */
+ protected function getImage()
+ {
+ $images = $this->model->getElementsByTagName('image');
+ if ($images->length > 0) {
+ $image = $images->item(0);
+ $details = array();
+ if ($image->hasChildNodes()) {
+ $details = array(
+ 'title' => $image->getElementsByTagName('title')->item(0)->value,
+ 'link' => $image->getElementsByTagName('link')->item(0)->value,
+ 'url' => $image->getElementsByTagName('url')->item(0)->value);
+ } else {
+ $details = array('title' => false,
+ 'link' => false,
+ 'url' => $image->attributes->getNamedItem('resource')->nodeValue);
+ }
+ $details = array_merge($details, array('description' => false, 'height' => false, 'width' => false));
+ return $details;
+ }
+ return false;
+ }
+
+ /**
+ * The textinput element is little used, but in the interests of
+ * completeness we will support it.
+ *
+ * @return array|false
+ */
+ protected function getTextInput()
+ {
+ $inputs = $this->model->getElementsByTagName('textinput');
+ if ($inputs->length > 0) {
+ $input = $inputs->item(0);
+ $results = array();
+ $results['title'] = isset(
+ $input->getElementsByTagName('title')->item(0)->value) ?
+ $input->getElementsByTagName('title')->item(0)->value : null;
+ $results['description'] = isset(
+ $input->getElementsByTagName('description')->item(0)->value) ?
+ $input->getElementsByTagName('description')->item(0)->value : null;
+ $results['name'] = isset(
+ $input->getElementsByTagName('name')->item(0)->value) ?
+ $input->getElementsByTagName('name')->item(0)->value : null;
+ $results['link'] = isset(
+ $input->getElementsByTagName('link')->item(0)->value) ?
+ $input->getElementsByTagName('link')->item(0)->value : null;
+ if (empty($results['link']) and
+ $input->attributes->getNamedItem('resource')) {
+ $results['link'] = $input->attributes->getNamedItem('resource')->nodeValue;
+ }
+ return $results;
+ }
+ return false;
+ }
+
+ /**
+ * Dublin Core provides the dc:creator, dc:contributor, and dc:publisher
+ * elements for defining authorship in RSS1. We will try each of those in
+ * turn in order to simulate the atom author element and will return it
+ * as text.
+ *
+ * @return array|false
+ */
+ function getAuthor()
+ {
+ $options = array('creator', 'contributor', 'publisher');
+ foreach ($options as $element) {
+ $test = $this->model->getElementsByTagName($element);
+ if ($test->length > 0) {
+ return $test->item(0)->value;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * In RSS1 a link is a text element but in order to ensure that we resolve
+ * URLs properly we have a special function for them.
+ *
+ * @return string
+ */
+ function getLink($offset = 0, $attribute = 'href', $params = false)
+ {
+ $links = $this->model->getElementsByTagName('link');
+ if ($links->length < $offset+1) {
+ return false;
+ }
+ $link = $links->item($offset);
+ return $this->addBase($link->nodeValue, $link);
+ }
+}
+
+?>
View
147 Parser/RSS1Element.php
@@ -0,0 +1,147 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+/**
+ * RSS1 Element class for XML_Feed_Parser
+ *
+ * PHP versions 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package XML_Feed_Parser
+ * @author James Stewart <james@jystewart.net>
+ * @copyright 2005 James Stewart <james@jystewart.net>
+ * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
+ * @version CVS: $Id$
+ * @link http://dev.jystewart.net/XML_Feed_Parser/
+ */
+
+/*
+ * This class provides support for RSS 1.0 entries. It will usually be called by
+ * XML_Feed_Parser_RSS1 with which it shares many methods.
+ *
+ * @author James Stewart <james@jystewart.net>
+ * @version 0.2.2 22nd September 2005
+ * @package XML_Feed_Parser
+ */
+class XML_Feed_Parser_RSS1Element extends XML_Feed_Parser_RSS1
+{
+ /**
+ * This will be a reference to the parent object for when we want
+ * to use a 'fallback' rule
+ * @var XML_Feed_Parser_RSS1
+ */
+ protected $parent;
+
+ /**
+ * Our specific element map
+ * @var array
+ */
+ protected $map = array(
+ 'id' => array('Id'),
+ 'title' => array('Text'),
+ 'link' => array('Link'),
+ 'description' => array('Text'), # or dc:description
+ 'category' => array('Category'),
+ 'rights' => array('Text'), # dc:rights
+ 'creator' => array('Text'), # dc:creator
+ 'publisher' => array('Text'), # dc:publisher
+ 'contributor' => array('Text'), # dc:contributor
+ 'date' => array('Date'), # dc:date
+ 'content' => array('Content')
+ );
+
+ /**
+ * Here we map some elements to their atom equivalents. This is going to be
+ * quite tricky to pull off effectively (and some users' methods may vary)
+ * but is worth trying. The key is the atom version, the value is RSS1.
+ * @var array
+ */
+ protected $compatMap = array(
+ 'content' => array('content'),
+ 'updated' => array('lastBuildDate'),
+ 'published' => array('pubdate'),
+ 'subtitle' => array('description'),
+ 'updated' => array('date'),
+ 'author' => array('creator'),
+ 'contributor' => array('contributor')
+ );
+
+ /**
+ * Store useful information for later.
+ *
+ * @param DOMElement $element - this item as a DOM element
+ * @param XML_Feed_Parser_RSS1 $parent - the feed of which this is a member
+ */
+ function __construct(DOMElement $element, $parent, $xmlBase = '')
+ {
+ $this->model = $element;
+ $this->parent = $parent;
+ }
+
+ /**
+ * There is no established way of showing an ID for an RSS1 entry. We will
+ * simulate it using the rdf:about attribute of the entry element. This cannot
+ * be relied upon for unique IDs but may prove useful.
+ *
+ * @return string|false
+ */
+ function getId()
+ {
+ if ($this->model->attributes->getNamedItem('about')) {
+ return $this->model->attributes->getNamedItem('about')->nodeValue;
+ }
+ return false;
+ }
+
+ /**
+ * The official way to include full content in an RSS1 entry is to use
+ * the content module's element 'encoded'. Often, however, the 'description'
+ * element is used instead. We will offer that as a fallback.
+ *
+ * @return string|false
+ */
+ function getContent()
+ {
+ $options = array('encoded', 'description');
+ foreach ($options as $element) {
+ $test = $this->model->getElementsByTagName($element);
+ if ($test->length == 0) {
+ continue;
+ }
+ if ($test->item(0)->hasChildNodes()) {
+ $value = '';
+ foreach ($test->item(0)->childNodes as $child) {
+ if ($child instanceof DOMText) {
+ $value .= $child->nodeValue;
+ } else {
+ $simple = simplexml_import_dom($child);
+ $value .= $simple->asXML();
+ }
+ }
+ return $value;
+ } else if ($test->length > 0) {
+ return $test->item(0)->nodeValue;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * How RSS1 should support for enclosures is not clear. For now we will return
+ * false.
+ *
+ * @return false
+ */
+ function getEnclosure()
+ {
+ return false;
+ }
+}
+
+?>
View
303 Parser/RSS2.php
@@ -0,0 +1,303 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+/**
+ * Class representing feed-level data for an RSS2 feed
+ *
+ * PHP versions 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package XML_Feed_Parser
+ * @author James Stewart <james@jystewart.net>
+ * @copyright 2005 James Stewart <james@jystewart.net>
+ * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
+ * @version CVS: $Id$
+ * @link http://dev.jystewart.net/XML_Feed_Parser/
+ */
+
+/**
+ * This class handles RSS2 feeds.
+ *
+ * @author James Stewart <james@jystewart.net>
+ * @version 0.2.2 22nd September 2005
+ * @package XML_Feed_Parser
+ */
+class XML_Feed_Parser_RSS2 extends XML_Feed_Parser_Type
+{
+ /**
+ * The URI of the RelaxNG schema used to (optionally) validate the feed
+ * @var string
+ */
+ private $relax = 'http://dmorelli.sdf-us.org/files/rss2/rss-2_0.rng';
+
+ /**
+ * We're likely to use XPath, so let's keep it global
+ * @var DOMXPath
+ */
+ protected $xpath;
+
+ /**
+ * The feed type we are parsing
+ * @var string
+ */
+ public $version = 'RSS 2.0';
+
+ /**
+ * The class used to represent individual items
+ * @var string
+ */
+ protected $itemClass = 'XML_Feed_Parser_RSS2Element';
+
+ /**
+ * The element containing entries
+ * @var string
+ */
+ protected $itemElement = 'item';
+
+ /**
+ * Here we map those elements we're not going to handle individually
+ * to the constructs they are. The optional second parameter in the array
+ * tells the parser whether to 'fall back' (not apt. at the feed level) or
+ * fail if the element is missing. If the parameter is not set, the function
+ * will simply return false and leave it to the client to decide what to do.
+ * @var array
+ */
+ protected $map = array(
+ 'ttl' => array('Text'),
+ 'pubDate' => array('Date'),
+ 'lastBuildDate' => array('Date'),
+ 'title' => array('Text'),
+ 'link' => array('Link'),
+ 'description' => array('Text'),
+ 'language' => array('Text'),
+ 'copyright' => array('Text'),
+ 'managingEditor' => array('Text'),
+ 'webMaster' => array('Text'),
+ 'category' => array('Text'),
+ 'generator' => array('Text'),
+ 'docs' => array('Text'),
+ 'ttl' => array('Text'));
+
+ /**
+ * Here we map some elements to their atom equivalents. This is going to be
+ * quite tricky to pull off effectively (and some users' methods may vary)
+ * but is worth trying. The key is the atom version, the value is RSS2.
+ * @var array
+ */
+ protected $compatMap = array(
+ 'title' => array('title'),
+ 'rights' => array('copyright'),
+ 'updated' => array('lastBuildDate'),
+ 'subtitle' => array('description'),
+ 'date' => array('pubDate'),
+ 'author' => array('managingEditor'));
+
+ /**
+ * Our constructor does nothing more than its parent.
+ *
+ * @todo map namespaces if required?
+ * @param DOMDocument $xml A DOM object representing the feed
+ * @param bool (optional) $string Whether or not to validate this feed
+ */
+ function __construct(DOMDocument $model, $strict = false)
+ {
+ $this->model = $model;
+
+ if ($strict) {
+ if (! $this->model->relaxNGValidateSource($this->relax)) {
+ throw new XML_Feed_Parser_Exception('Failed required validation');
+ }
+ }
+
+ $this->xpath = new DOMXPath($this->model);
+ $this->numberEntries = $this->count('item');
+ }
+
+ /**
+ * This is not really something that will work with RSS2 as it does not have
+ * clear restrictions on the global uniqueness of IDs. But we can emulate
+ * it by allowing access based on the 'guid' element.
+ *
+ * @param string $id any valid ID.
+ * @return XML_Feed_Parser_RSS2Element
+ */
+ function getEntryById($id)
+ {
+ if (isset($this->idMappings[$id])) {
+ return $this->entries[$this->idMappings[$id]];
+ }
+
+ $entries = $this->xpath->query("//item[guid='$id']");
+ if ($entries->length > 0) {
+ $entry = new $this->itemElement($entries->item(0), $this);
+ return $entry;
+ }
+ }
+
+ /**
+ * The category element is a simple text construct which can occur any number
+ * of times. We allow access by offset or access to an array of results.
+ *
+ * @param string $call for compatibility with our overloading
+ * @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array
+ * @return string|array|false
+ */
+ function getCategory($call, $arguments = array())
+ {
+ $categories = $this->model->getElementsByTagName('category');
+ $offset = empty($arguments[0]) ? 0 : $arguments[0];
+ $array = empty($arguments[1]) ? false : true;
+ if ($categories->length < $offset or $categories->length == 0) {
+ return false;
+ }
+ if ($array) {
+ $list = array();
+ foreach ($categories as $category) {
+ array_push($list, $category->nodeValue);
+ }
+ return $list;
+ }
+ return $categories->item($offset)->nodeValue;
+ }
+
+ /**
+ * Get details of the image associated with the feed.
+ *
+ * @return array|false an array simply containing the child elements
+ */
+ protected function getImage()
+ {
+ $images = $this->model->getElementsByTagName('image');
+ if ($images->length > 0) {
+ $image = $images->item(0);
+ $desc = $image->getElementsByTagName('description');
+ $description = $desc->length ? $desc->item(0)->value : false;
+ $heigh = $image->getElementsByTagName('height');
+ $height = $heigh->length ? $heigh->item(0)->value : false;
+ $widt = $image->getElementsByTagName('width');
+ $width = $widt->length ? $widt->item(0)->value : false;
+ return array(
+ 'title' => $image->getElementsByTagName('title')->item(0)->value,
+ 'link' => $image->getElementsByTagName('link')->item(0)->value,
+ 'url' => $image->getElementsByTagName('url')->item(0)->value,
+ 'description' => $description,
+ 'height' => $height,
+ 'width' => $width);
+ }
+ return false;
+ }
+
+ /**
+ * The textinput element is little used, but in the interests of
+ * completeness...
+ *
+ * @return array|false
+ */
+ function getTextInput()
+ {
+ $inputs = $this->model->getElementsByTagName('input');
+ if ($inputs->length > 0) {
+ $input = $inputs->item(0);
+ return array(
+ 'title' => $input->getElementsByTagName('title')->item(0)->value,
+ 'description' =>
+ $input->getElementsByTagName('description')->item(0)->value,
+ 'name' => $input->getElementsByTagName('name')->item(0)->value,
+ 'link' => $input->getElementsByTagName('link')->item(0)->value);
+ }
+ return false;
+ }
+
+ /**
+ * This is a general function used by both getSkipDays and getSkipHours. It simply
+ * returns an array of the values of the children of the appropriate tag.
+ *
+ * @param string
+ * @return array|false
+ */
+ protected function getSkips($tagName)
+ {
+ $hours = $this->model->getElementsByTagName($tagName);
+ if ($hours->length == 0) {
+ return false;
+ }
+ $skipHours = array();
+ foreach($hours->item(0)->childNodes as $hour) {
+ if ($hour instanceof DOMElement) {
+ array_push($skipHours, $hour->nodeValue);
+ }
+ }
+ return $skipHours;
+ }
+
+ /**
+ * The skiphours element provides a list of hourss on which this feed should
+ * not be checked. We return an array of those hours (integers, 24 hour clock)
+ *
+ * @return array
+ */
+ function getSkipHours()
+ {
+ return $this->getSkips('skipHours');
+ }
+
+ /**
+ * The skipdays element provides a list of days on which this feed should
+ * not be checked. We return an array of those days.
+ *
+ * @return array
+ */
+ function getSkipDays()
+ {
+ return $this->getSkips('skipDays');
+ }
+
+ /**
+ * The cloud element is rarely used. It is designed to provide some details
+ * of a location to update the feed.
+ *
+ * @return array an array of the attributes of the element
+ */
+ function getCloud()
+ {
+ $cloud = $this->model->getElementsByTagName('cloud');
+ if ($cloud->length == 0) {
+ return false;
+ }
+ $cloudData = array();
+ foreach ($cloud->item(0)->attributes as $attribute) {
+ $cloudData[$attribute->name] = $attribute->value;
+ }
+ return $cloudData;
+ }
+
+ /**
+ * In RSS2 a link is a text element but in order to ensure that we resolve
+ * URLs properly we have a special function for them. We maintain the
+ * parameter used by the atom getLink method, though we only use the offset
+ * parameter.
+ *
+ * @param int
+ * @param string
+ * @param array
+ * @return string
+ */
+ function getLink($offset, $attribute = 'href', $params = array())
+ {
+ $links = $this->model->getElementsByTagName('link');
+
+ if ($links->length < $offset + 1) {
+ return false;
+ }
+ $link = $links->item($offset);
+ return $this->addBase($link->nodeValue, $link);
+ }
+}
+
+?>
View
147 Parser/RSS2Element.php
@@ -0,0 +1,147 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+/**
+ * Class representing entries in an RSS2 feed.
+ *
+ * PHP versions 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package XML_Feed_Parser
+ * @author James Stewart <james@jystewart.net>
+ * @copyright 2005 James Stewart <james@jystewart.net>
+ * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
+ * @version CVS: $Id$
+ * @link http://dev.jystewart.net/XML_Feed_Parser/
+ */
+
+/**
+ * This class provides support for RSS 2.0 entries. It will usually be
+ * called by XML_Feed_Parser_RSS2 with which it shares many methods.
+ *
+ * @author James Stewart <james@jystewart.net>
+ * @version 0.2.2 22nd September 2005
+ * @package XML_Feed_Parser
+ */
+class XML_Feed_Parser_RSS2Element extends XML_Feed_Parser_RSS2
+{
+ /**
+ * This will be a reference to the parent object for when we want
+ * to use a 'fallback' rule
+ * @var XML_Feed_Parser_RSS2
+ */
+ protected $parent;
+
+ /**
+ * Our specific element map
+ * @var array
+ */
+ protected $map = array(
+ 'title' => array('Text'),
+ 'guid' => array('Guid'),
+ 'description' => array('Text'),
+ 'author' => array('Text'),
+ 'comments' => array('Text'),
+ 'enclosure' => array('Enclosure'),
+ 'pubDate' => array('Date'),
+ 'source' => array('Source'),
+ 'link' => array('Text'));
+
+ /**
+ * Here we map some elements to their atom equivalents. This is going to be
+ * quite tricky to pull off effectively (and some users' methods may vary)
+ * but is worth trying. The key is the atom version, the value is RSS2.
+ * @var array
+ */
+ protected $compatMap = array(
+ 'id' => array('guid'),
+ 'content' => array('description'),
+ 'updated' => array('lastBuildDate'),
+ 'published' => array('pubdate'));
+
+ /**
+ * Store useful information for later.
+ *
+ * @param DOMElement $element - this item as a DOM element
+ * @param XML_Feed_Parser_RSS2 $parent - the feed of which this is a member
+ */
+ function __construct(DOMElement $element, $parent, $xmlBase = '')
+ {
+ $this->model = $element;
+ $this->parent = $parent;
+ }
+
+ /**
+ * guid is the closest RSS2 has to atom's ID. It is usually but not always a URI.
+ * The one attribute that RSS2 can posess is 'ispermalink' which specifies whether
+ * the guid is itself dereferencable. Use of guid is not obligatory, but is
+ * advisable.
+ *
+ * @todo Implement ispermalink support
+ * @return string the guid
+ */
+ function getGuid()
+ {
+ if ($this->model->getElementsByTagName('guid')->length > 0) {
+ return $this->model->getElementsByTagName('guid')->item(0)->nodeValue;
+ }
+ return false;
+ }
+
+ /**
+ * The RSS2 spec is ambiguous as to whether an enclosure element must be
+ * unique in a given entry. For now we will assume it needn't, and allow
+ * for an offset.
+ *
+ * @param int offset
+ * @return array|false
+ */
+ function getEnclosure($offset = 0)
+ {
+ $encs = $this->model->getElementsByTagName('enclosure');
+ if ($encs->length >= $offset) {
+ try {
+ $attrs = $encs->item($offset)->attributes;
+ return array(
+ 'url' => $attrs->getNamedItem('url')->value,
+ 'length' => $attrs->getNamedItem('length')->value,
+ 'type' => $attrs->getNamedItem('type')->value);
+ } catch (Exception $e) {
+ return false;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * source is an optional sub-element of item. Like atom:source it tells
+ * us about where the entry came from (eg. if it's been copied from another
+ * feed). It is not a rich source of metadata in the same way as atom:source
+ * and while it would be good to maintain compatibility by returning an
+ * XML_Feed_Parser_RSS2 element, it makes a lot more sense to return an array.
+ *
+ * @return array|false
+ */
+ function getSource()
+ {
+ $get = $this->model->getElementsByTagName('source');
+ if ($get->length) {
+ $source = $get->item(0);
+ $array = array(
+ 'content' => $source->nodeValue);
+ foreach ($source->attributes as $attribute) {
+ $array[$attribute->name] = $attribute->value;
+ }
+ return $array;
+ }
+ return false;
+ }
+}
+
+?>
View
304 Parser/Type.php
@@ -0,0 +1,304 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
+
+/**
+ * Abstract class providing common methods for XML_Feed_Parser feeds.
+ *
+ * PHP versions 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @todo Produce tests with non-UTF8 encoded feeds
+ * @category XML
+ * @package XML_Feed_Parser
+ * @author James Stewart <james@jystewart.net>
+ * @copyright 2005 James Stewart <james@jystewart.net>
+ * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
+ * @version CVS: $Id$
+ * @link http://dev.jystewart.net/XML_Feed_Parser/
+ */
+
+/**
+ * This abstract class provides some general methods that are likely to be
+ * implemented exactly the same way for all feed types.
+ *
+ * @package XML_Feed_Parser
+ * @author James Stewart <james@jystewart.net>
+ * @version 0.2.2 22nd September 2005
+ */
+abstract class XML_Feed_Parser_Type
+{
+ /**
+ * Where we store our DOM object for this feed
+ * @var DOMDocument
+ */
+ public $model;
+
+ /**
+ * We don't particularly need to use this for this class, but it's helpful
+ * to make inheritance work.
+ * @var string
+ */
+ protected $xmlBase;
+
+ /**
+ * For iteration we'll want a count of the number of entries
+ * @var int
+ */
+ public $numberEntries;
+
+ /**
+ * Where we store our entry objects once instantiated
+ * @var array
+ */
+ public $entries = array();
+
+ /**
+ * We are not going to provide methods for every entry type so this
+ * function will allow for a lot of mapping. We rely pretty heavily
+ * on this to handle our mappings between other feed types and atom.
+ *
+ * @param string $call - the method attempted
+ * @param array $arguments - arguments to that method
+ * @return mixed
+ */
+ function __call($call, $arguments = array())
+ {
+ if (! is_array($arguments)) {
+ $arguments = array();
+ }
+
+ if (isset($this->compatMap[$call])) {
+ $arguments = array_merge($arguments, $this->compatMap[$call]);
+ $call = $this->compatMap[$call][0];
+ }
+
+ if (isset($this->map[$call])) {
+ $method = 'get' . $this->map[$call][0];
+ if ($method == 'getLink') {
+ $offset = isset($arguments[0][0]) ? $arguments[0][0] : 0;
+ $attribute = isset($arguments[0][1]) ? $arguments[0][1] : 'href';
+ $params = isset($arguments[0][2]) ? $arguments[0][2] : array();
+ return $this->getLink($offset, $attribute, $params);
+ }
+ } else {
+ return false;
+ }
+
+ if (method_exists($this, $method)) {
+ return $this->$method($call, $arguments);
+ }
+
+ return false;
+ }
+
+ /**
+ * For many elements variable-style access will be desirable. This function
+ * provides for that.
+ *
+ * @param string $value - the variable required
+ * @return mixed
+ */
+ function __get($value)
+ {
+ return $this->$value();
+ }
+
+ /**
+ * We will often need to extract the xml:base values that apply to a
+ * link. This method iterates through the heirarchy and extracts the
+ * relevant attributes, and then combines them.
+ *
+ * @param DOMElement The starting node
+ * @return string
+ */
+ function getBase($thisNode)
+ {
+ /* We'll need some containers and settings */
+ $bases = array();
+ $combinedBase = $this->xmlBase;
+ preg_match('/^([A-Za-z]+:\/\/.*?)\//', $combinedBase, $results);
+ isset($results[1]) ? $firstLayer = $results[1] : $firstLayer = '';
+
+ $nameSpace = 'http://www.w3.org/XML/1998/namespace';
+
+ /* Iterate up the tree and grab all parent xml:bases */
+ while ($thisNode instanceof DOMElement) {
+ if ($thisNode->hasAttributes()) {
+ $test = $thisNode->attributes->getNamedItemNS($nameSpace, 'base');
+ if ($test) {
+ array_push($bases, $test->nodeValue);
+ }
+ }
+ $thisNode = $thisNode->parentNode;
+ }
+
+ /* if starts with a protocol then restart the string. if starts with a / then
+ * add on to the domain name. otherwise tag on to the end */
+ $bases = array_reverse($bases);
+
+ foreach ($bases as $base) {
+ if (preg_match('/^[A-Za-z]+:\/\//', $base)) {
+ $combinedBase = $base;
+ preg_match('/^([A-Za-z]+:\/\/.*?)\//', $base, $results);
+ $firstLayer = $results[1];
+ } else if (preg_match('/^\//', $base)) {
+ $combinedBase = $firstLayer . $base;
+ } else {
+ $combinedBase .= $base;
+ }
+
+ }
+ return $combinedBase;
+ }
+
+ /**
+ * getBase gets us the xml:base data. We then need to process that with regard
+ * to our current link. This function does that and returns the link in as
+ * complete a form as possible.
+ *
+ * @param string
+ * @param DOMElement
+ * @return string
+ */
+ function addBase($link, $element)
+ {
+ if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
+ return $link;
+ }
+
+ $base = $this->getBase($element);
+
+ if (preg_match('/^\//', $link)) {
+ preg_match('/^([A-Za-z]+:\/\/.*?)\//', $base, $results);
+ $root = $results[1];
+ return $root . $link;
+ } else {
+ return $base . $link;
+ }
+ }
+
+ /**
+ * Pretty fundamental!
+ *
+ * @param int $offset
+ * @return XML_Feed_Parser_RSS1Element
+ */
+ function getEntryByOffset($offset)
+ {
+ if (! isset($this->entries[$offset])) {
+ $entries = $this->model->getElementsByTagName($this->itemElement);
+ if ($entries->length > 0) {
+ $xmlBase = $this->getBase($entries->item($offset));
+ $this->entries[$offset] = new $this->itemClass(
+ $entries->item($offset), $this, $xmlBase);
+ } else {
+ throw new XML_Feed_Parser_Exception('No entries found');
+ }
+ }
+
+ return $this->entries[$offset];
+ }
+
+ /**
+ * Get a date construct. We use PHP's strtotime to return it as a unix datetime
+ *
+ * @param string $method The name of the date construct we want
+ * @param array $arguments Included for compatibility with our __call usage
+ * @return int|false datetime
+ */
+ protected function getDate($method, $arguments)
+ {
+ $time = $this->model->getElementsByTagName($method);
+ if ($time->length == 0) {
+ return false;
+ }
+ return strtotime($time->item(0)->nodeValue);
+ }
+
+ /**
+ * Get a text construct.
+ *
+ * @param string $method The name of the text construct we want
+ * @param array $arguments Included for compatibility with our __call usage
+ * @return string
+ */
+ protected function getText($method, $arguments = array())
+ {
+ $tags = $this->model->getElementsByTagName($method);
+ if ($tags->length > 0) {
+ $value = $tags->item(0)->nodeValue;
+ return $value;
+ }
+ return false;
+ }
+
+ /**
+ * There is no single way of declaring a category in RSS1 or Atom as there is
+ * in RSS2.
+ * Instead the usual approach is to use the dublin core namespace to declare
+ * categories. For example delicious use both: <dc:subject>PEAR</dc:subject>
+ * and: <taxo:topics><rdf:Bag>
+ * <rdf:li resource="http://del.icio.us/tag/PEAR" /></rdf:Bag></taxo:topics>
+ * to declare a categorisation of 'PEAR'.
+ *
+ * We need to be sensitive to this where possible. For the initial implementation
+ * we will simply extract all dc:subject entries as that is common across Atom and
+ * RSS1.
+ *
+ * @param string $call for compatibility with our overloading
+ * @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array
+ * @return string|array|false
+ */
+ protected function getCategory($call, $arguments)
+ {
+ $categories = $this->model->getElementsByTagName('subject');
+ $offset = empty($arguments[0]) ? 0 : $arguments[0];
+ $array = empty($arguments[1]) ? false : true;
+ if ($categories->length < $offset or $categories->length == 0) {
+ return false;
+ }
+ if ($array) {
+ $list = array();
+ foreach ($categories as $category) {
+ array_push($list, $category->nodeValue);
+ }
+ return $list;
+ }
+ return $categories->item($offset)->nodeValue;
+ }
+
+ /**
+ * This function will tell us how many times the element $type
+ * appears at this level of the feed.
+ *
+ * @param string $type the element we want to get a count of
+ * @return int
+ */
+ protected function count($type)
+ {
+ if ($tags = $this->model->getElementsByTagName($type)) {
+ return $tags->length;
+ }
+ return 0;
+ }
+
+ /**
+ * Return an XML serialization of the feed, should it be required. Most
+ * users however, will already have a serialization that they used when
+ * instantiating the object.
+ *
+ * @return string XML serialization of element
+ */
+ function __toString()
+ {
+ $simple = simplexml_import_dom($this->model);
+ return $simple->asXML();
+ }
+}
+
+?>
View
142 package.xml
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="ISO-8859-1" ?>
+<package version="1.0">
+ <name>XML_Feed_Parser</name>
+ <summary>Providing a somewhat unified API for handling </summary>
+ <description>XML_Feed_Parser is a parser for RSS1, RSS2 and Atom format XML feeds. It attempts to provide a somewhat unified API while still allowing access to the full details of each feed type.</description>
+ <license>PHP License</license>
+ <maintainers>
+ <maintainer>
+ <user>jystewart</user>
+ <role>lead</role>
+ <name>James Stewart</name>
+ <email>james@jystewart.net</email>
+ </maintainer>
+ </maintainers>
+ <release>
+ <version>0.2.4alpha</version>
+ <state>alpha</state>
+ <date>2005-10-10</date>
+ <notes>First release in PEAR
+Added custom exception class (vote condition)
+Moved to single quotes in most cases (vote condition)</notes>
+ <filelist>
+ <dir name="/" baseinstalldir="/XML/Feed">
+ <dir name="Parser" role="php">
+ <file name="Exception.php">
+ <replace from="@version" to="version" type="package-info"/>
+ </file>
+ <file name="Atom.php">
+ <replace from="@version@" to="version" type="package-info"/>
+ </file>
+ <file name="AtomElement.php">
+ <replace from="@version@" to="version" type="package-info"/>
+ </file>
+ <file name="RSS1.php">
+ <replace from="@version@" to="version" type="package-info"/>
+ </file>
+ <file name="RSS1Element.php">
+ <replace from="@version@" to="version" type="package-info"/>
+ </file>
+ <file name="RSS2.php">
+ <replace from="@version@" to="version" type="package-info"/>
+ </file>
+ <file name="RSS2Element.php">
+ <replace from="@version@" to="version" type="package-info"/>
+ </file>
+ <file name="Type.php">
+ <replace from="@version@" to="version" type="package-info"/>
+ </file>
+ </dir>
+ <file role="php" name="Parser.php">
+ <replace from="@version@" to="version" type="package-info"/>
+ </file>
+ <dir name="tests" role="test">
+ <file name="accessTypes.php"/>
+ <file name="atomCompliance.php"/>
+ <file name="iteration.php"/>
+ <file name="atomValues.php"/>
+ <file name="atomEntryOnly.php"/>
+ <file name="rss1Values.php"/>
+ <file name="rss2Values.php"/>
+ </dir>
+ <dir name="samples" role="data">
+ <file name="atom10-example1.xml"/>
+ <file name="atom10-example2.xml"/>
+ <file name="atom10-entryonly.xml"/>
+ <file name="rss10-example1.xml"/>
+ <file name="rss10-example2.xml"/>
+ <file name="rss2sample.xml"/>
+ <file name="delicious.feed"/>
+ <file name="flickr.feed"/>
+ <file name="grwifi-atom.xml"/>
+ <file name="technorati.feed"/>
+ </dir>
+ </dir>
+ </filelist>
+ <deps>
+ </deps>
+ </release>
+ <changelog>
+ <release>
+ <version>0.2.3alpha</version>
+ <state>alpha</state>
+ <date>2005-10-02</date>
+ <notes>Moved to PEAR_Exception
+Fixed RSS1 test to handle entities properly
+Preparing for pepr call for votes</notes>
+ </release>
+ <release>
+ <version>0.2.2alpha</version>
+ <state>alpha</state>
+ <date>2005-09-22</date>
+ <notes>Lots of CS fixes.
+Added @var notes for members, and file level docblocks
+Renamed some member names that were prefixed with underscores
+Cleaned up a few methods' comment blocks
+Moved all require_once calls to appropriate places in Parser.php
+Fix to atom enclosure support
+Added test for entry-only atom feed</notes>
+ </release>
+ <release>
+ <version>0.2.1alpha</version>
+ <state>alpha</state>
+ <date>2005-09-14</date>
+ <notes>Unit tests added
+Various fixes and checks added following testing</notes>
+ </release>
+ <release>
+ <version>0.2.0devel</version>
+ <state>devel</state>
+ <date>2005-09-11</date>
+ <notes>Made DOMDocuments for feed and entries public to allow for extensions that make use of the DOM
+Added functions to handle xml:base and applied to link elements
+Some work on API consistency
+Improved handling of content type="xhtml" for atom feeds
+Fixes to use of __tostring() method</notes>
+ </release>
+ <release>
+ <version>0.1.2devel</version>
+ <state>devel</state>
+ <date>2005-09-10</date>
+ <notes>Added support for RSS1/2 'image' and 'textinput' elements
+ Added category support (dc:subject) for RSS1
+ Added cloud, ttl, skipDays, and skipHours support for RSS2 (completing RSS spec support)
+ Added support for the RSS1 syndication and content modules.
+ Added partial RSS1 Dublin Core support
+ Added support for atom:source support and reworked atom:author support to work with it</notes>
+ </release>
+ <release>
+ <version>0.1.1devel</version>
+ <state>devel</state>
+ <date>2005-09-07</date>
+ <notes>Added Enclosures support
+Fixed up getElementById for RSS1</notes>
+ </release>
+ <release>
+ <version>0.1.0devel</version>
+ <state>devel</state>
+ <date>2005-09-03</date>
+ <notes>Initial rolling</notes>
+ </release>
+ </changelog>
+</package>
View
28 samples/atom10-entryonly.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="utf-8"?>
+<entry xmlns="http://www.w3.org/2005/Atom">
+ <title>Atom draft-07 snapshot</title>
+ <link rel="alternate" type="text/html"
+ href="http://example.org/2005/04/02/atom"/>
+ <link rel='enclosure' type="audio/mpeg" length="1337"
+ href="http://example.org/audio/ph34r_my_podcast.mp3"/>
+ <id>tag:example.org,2003:3.2397</id>
+ <updated>2005-07-10T12:29:29Z</updated>
+ <published>2003-12-13T08:29:29-04:00</published>
+ <author>
+ <name>Mark Pilgrim</name>
+ <uri>http://example.org/</uri>
+ <email>f8dy@example.com</email>
+ </author>
+ <contributor>
+ <name>Sam Ruby</name>
+ </contributor>
+ <contributor>
+ <name>Joe Gregorio</name>
+ </contributor>
+ <content type="xhtml" xml:lang="en"
+ xml:base="http://diveintomark.org/">
+ <div xmlns="http://www.w3.org/1999/xhtml">
+ <p><i>[Update: The Atom draft is finished.]</i></p>
+ </div>
+ </content>
+ </entry>
View
20 samples/atom10-example1.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+
+ <title>Example Feed</title>
+ <link href="http://example.org/"/>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <author>
+ <name>John Doe</name>
+ </author>
+ <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
+
+ <entry>
+ <title>Atom-Powered Robots Run Amok</title>
+ <link href="http://example.org/2003/12/13/atom03"/>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <updated>2003-12-13T18:30:02Z</updated>
+ <summary>Some text.</summary>
+ </entry>
+
+</feed>
View
45 samples/atom10-example2.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="utf-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <title type="text">dive into mark</title>
+ <subtitle type="html">
+ A &lt;em&gt;lot&lt;/em&gt; of effort
+ went into making this effortless
+ </subtitle>
+ <updated>2005-07-31T12:29:29Z</updated>
+ <id>tag:example.org,2003:3</id>
+ <link rel="alternate" type="text/html"
+ hreflang="en" href="http://example.org/"/>
+ <link rel="self" type="application/atom+xml"
+ href="http://example.org/feed.atom"/>
+ <rights>Copyright (c) 2003, Mark Pilgrim</rights>
+ <generator uri="http://www.example.com/" version="1.0">
+ Example Toolkit
+ </generator>
+ <entry>
+ <title>Atom draft-07 snapshot</title>
+ <link rel="alternate" type="text/html"
+ href="http://example.org/2005/04/02/atom"/>
+ <link rel='enclosure' type="audio/mpeg" length="1337"
+ href="http://example.org/audio/ph34r_my_podcast.mp3"/>
+ <id>tag:example.org,2003:3.2397</id>
+ <updated>2005-07-31T12:29:29Z</updated>
+ <published>2003-12-13T08:29:29-04:00</published>
+ <author>
+ <name>Mark Pilgrim</name>
+ <uri>http://example.org/</uri>
+ <email>f8dy@example.com</email>
+ </author>
+ <contributor>