Skip to content

Commit

Permalink
Documented & commented everything in phpdoc style. Phew.
Browse files Browse the repository at this point in the history
  • Loading branch information
mibe committed Jan 10, 2013
1 parent 7eb16c3 commit 9f18fb4
Show file tree
Hide file tree
Showing 12 changed files with 404 additions and 1 deletion.
54 changes: 54 additions & 0 deletions ArchiveBase.php
@@ -1,10 +1,30 @@
<?php

/**
* Represents an data archive which contains different items.
*
* This class is abstract; the methods {@link load()} and {@link save()} have to be
* implemented in subclasses.
*
* @author Michael Bemmerl <mail@mx-server.de>
* @copyright Copyright (C) 2013 Michael Bemmerl
*/
abstract class ArchiveBase
{
protected $contents;
protected $archiveIdentifier;

/**
* Constructor of the class. Needs an archive identifier, which is used to
* uniquely identify any archive.
*
* The archive is loaded in the constructor. The loading takes place in the
* subclasses, which implement the two abstract methods.
*
* Throws an exception, if the identifier is empty or not a string.
*
* @param string Identifier of the archive
*/
function __construct($archiveIdentifier)
{
if (empty($archiveIdentifier) || !is_string($archiveIdentifier))
Expand All @@ -15,31 +35,65 @@ function __construct($archiveIdentifier)
$this->load();
}

/**
* Destructor of the class. Saves the archive back to its storage medium.
*
* The saving is done in the subclass, which implements the abstract
* {@link save()} method.
*/
function __destruct()
{
$this->save();
}

/**
* Loads the archive.
*/
abstract protected function load();

/**
* Saves the archive.
*/
abstract protected function save();

/**
* Add an entry to the archive.
*
* The entry is checked against duplicates.
*
* @param string
*/
public function add($uid)
{
if (!$this->contains($uid))
$this->contents[] = $uid;
}

/**
* Remove an element from the archive.
*
* @param string
*/
public function remove($uid)
{
if (($key = array_search($uid, $this->contents, TRUE)) !== FALSE)
unset($this->contents[$key]);
}

/**
* Check if the archive already contains an specified element.
*
* @param string
* @returns bool TRUE if the element is already in the archive.
*/
public function contains($uid)
{
return in_array($uid, $this->contents, TRUE);
}

/**
* Remove every element in the archive. The archive will be empty after that.
*/
public function clear()
{
$this->contents = array();
Expand Down
12 changes: 12 additions & 0 deletions AtomFeedItem.php
@@ -1,12 +1,24 @@
<?php

/**
* Represents an feed item in ATOM format.
*
* @author Michael Bemmerl <mail@mx-server.de>
* @copyright Copyright (C) 2013 Michael Bemmerl
*/
class AtomFeedItem extends FeedItemBase
{
/**
* {@inheritdoc}
*/
public function __construct(DOMElement $xmlElement)
{
parent::__construct($xmlElement);
}

/**
* {@inheritdoc}
*/
public function parseXml()
{
$this->title = $this->getXmlChildValue('title');
Expand Down
12 changes: 12 additions & 0 deletions AtomFeedManipulator.php
@@ -1,7 +1,16 @@
<?php

/**
* Represents a class for manipulating XML feeds in ATOM format.
*
* @author Michael Bemmerl <mail@mx-server.de>
* @copyright Copyright (C) 2013 Michael Bemmerl
*/
class AtomFeedManipulator extends FeedManipulatorBase
{
/**
* {@inheritdoc}
*/
public function isSupported()
{
$feed = $this->feed->getElementsByTagName('feed');
Expand All @@ -14,6 +23,9 @@ public function isSupported()
return $xmlns == 'http://www.w3.org/2005/Atom';
}

/**
* {@inheritdoc}
*/
public function parseFeed()
{
$items = $this->feed->getElementsByTagName('entry');
Expand Down
57 changes: 56 additions & 1 deletion Core.php
Expand Up @@ -14,15 +14,39 @@

require('HttpClient.php');

/**
* Main class for filtering duplicated entries in RSS / ATOM feeds.
*
* The feed is loaded from the remote server and the XML parsed by PHP's
* DOMDocument class. Then every feed entry is checked against an archive
* to detect, if the feed entry was already seen. If this is true, the entry
* would be removed from the feed. After every entry is checked, the possibly
* altered feed XML is build again and sent to the client.
*
* @author Michael Bemmerl <mail@mx-server.de>
* @copyright Copyright (C) 2013 Michael Bemmerl
*/
class Core
{
private $feedUrl;
private $archive;
private $http;
private $feedManipulator;

/**
* List of feed manipulators which should be probed and used to
* manipulate the feed.
*
* @static
* @var array
*/
public static $manipulatorClasses = array('Rss2', 'Atom', 'Rss1');

/**
* Constructor of the class. Needs the URL of the feed.
*
* @param string $feedUrl URL to the feed which should be filtered.
*/
function __construct($feedUrl)
{
if (empty($feedUrl))
Expand All @@ -34,10 +58,21 @@ function __construct($feedUrl)
$this->archive = new FileArchive($feedUrl);
$this->http = new HttpClient();

// Download the feed and check if a manipulator supports it
$this->fetchFeed();
$this->detectManipulator();
}

/**
* Tries to detect the appropriate feed manipulator which can handle the
* downloaded feed, depending on the type of the feed.
*
* Every class name in the {@link $manipulatorClasses} array will be
* instantiated and probed if it supports the feed type.
*
* If the XML of the feed could not be parsed properly an exception with
* more information about the cause will be thrown.
*/
private function detectManipulator()
{
$lastLoadingError = '';
Expand Down Expand Up @@ -73,6 +108,12 @@ private function detectManipulator()
throw new ErrorException($msg, 501);
}

/**
* Retrieve the feed from the remote server.
*
* If the feed could not be downloaded an exception will be thrown with
* information about the cause.
*/
private function fetchFeed()
{
// Retrieve Feed
Expand All @@ -90,8 +131,15 @@ private function fetchFeed()
}
}

/**
* Filter out duplicated items in the feed. The resulting feed is directly sent to the client.
*
* The same Content-Type header field as the original feed had is also set so
* the MIME type (and encoding) the remote server used isn't lost.
*/
public function filter()
{
// Parse the feed and extract all items.
$this->feedManipulator->parseFeed();

foreach($this->feedManipulator as $item)
Expand All @@ -107,11 +155,18 @@ public function filter()
}

// Filtering is done, now build and output the altered feed.
// Also use the same Content-Type of the feed, so the encoding won't get lost.
// Also use the same Content-Type of the feed, so the MIME type
// (and encoding) won't get lost.
header('Content-Type: ' . $this->http->contentType);
print $this->feedManipulator->buildFeed();
}

/**
* Generate an unique identifier from the feed item.
*
* @param FeedItemBase $feedItem
* @return string
*/
private function buildUniqueId(FeedItemBase $feedItem)
{
return sha1($feedItem->title);
Expand Down
80 changes: 80 additions & 0 deletions FeedItemBase.php
@@ -1,23 +1,81 @@
<?php

/**
* Respresents a single entry of an feed.
*
* This class is abstract: The method {@link parseXml()} must be implemented
* in subclasses.
*
* @author Michael Bemmerl <mail@mx-server.de>
* @copyright Copyright (C) 2013 Michael Bemmerl
*/
abstract class FeedItemBase
{
/**
* Title of the entry. NULL if not available.
*
* @var string
*/
public $title;

/**
* Description of the entry. NULL if not available.
*
* @var string
*/
public $description;

/**
* Date of the entry. NULL if not available.
*
* @var string
*/
public $date;

/**
* Link of the entry. NULL if not available.
*
* @var string
*/
public $link;

/**
* ID of the entry. NULL if not available.
*
* @var string
*/
public $id;

/**
* Corresponding XML element.
*
* @var DOMElement
*/
public $xmlElement;

/**
* Constructor of the class.
*
* @param DOMElement
*/
function __construct(DOMElement $xmlElement)
{
$this->xmlElement = $xmlElement;
}

/**
* Parses the XML of the feed entry.
*/
abstract public function parseXml();

/**
* Returns the first child XML element or NULL if not found.
*
* Throws an exception if the tag name is empty or not a string.
*
* @param string Name of the XML tag.
* @return string|null
*/
private function getXmlChild($name)
{
if (empty($name) || !is_string($name))
Expand All @@ -28,6 +86,14 @@ private function getXmlChild($name)
return $list->length > 0 ? $list->item(0) : NULL;
}

/**
* Returns the value of first child XML element or NULL if not found.
*
* Throws an exception if the tag name is empty or not a string.
*
* @param string Name of the XML tag.
* @return string|null
*/
protected function getXmlChildValue($name)
{
if (empty($name) || !is_string($name))
Expand All @@ -38,6 +104,15 @@ protected function getXmlChildValue($name)
return $child != NULL ? $child->nodeValue : NULL;
}

/**
* Returns the attribute value of the first child XML element or NULL if not found.
*
* Throws an exception if the tag or attribute name is empty or not a string.
*
* @param string Name of the XML tag.
* @param string Name of the attribute.
* @return string|null
*/
protected function getXmlChildAttributeValue($name, $attributeName)
{
if (empty($name) || !is_string($name))
Expand All @@ -56,6 +131,11 @@ protected function getXmlChildAttributeValue($name, $attributeName)
return $attr != '' ? $attr : NULL;
}

/**
* Human representation of this instance.
*
* @return string
*/
public function __toString()
{
return sprintf("[%s] %s (%s) (URL: %s)\n", $this->id, $this->title, $this->date, $this->link);
Expand Down

0 comments on commit 9f18fb4

Please sign in to comment.