Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

commit a series of patches by Seth Price

produces XHTML 1.1
most of the bugs listed are fixed
It can clean up some fairly painful BBCode


git-svn-id: https://svn.php.net/repository/pear/packages/HTML_BBCodeParser/trunk@198393 c90b9560-bf6c-de11-be94-00142212c4b1
  • Loading branch information...
commit 3bf22f099a22c443efe3229860498ec67994d83a 1 parent 430d2d1
Arnaud Limbourg authored
View
139 BBCodeParser.php
@@ -136,7 +136,7 @@ class HTML_BBCodeParser
* @access private
* @var array
*/
- var $_options = array( 'quotestyle' => 'single',
+ var $_options = array( 'quotestyle' => 'double',
'quotewhat' => 'all',
'open' => '[',
'close' => ']',
@@ -339,6 +339,7 @@ function _buildTagArray()
$prev = $tag;
$strPos = $newPos;
}
+
}
@@ -364,7 +365,7 @@ function _buildTag($str)
if (substr($str, 1, 1) == '/') { /* closing tag */
- $tag['tag'] = substr($str, 2, strlen($str) - 3);
+ $tag['tag'] = strtolower(substr($str, 2, strlen($str) - 3));
if ( (in_array($tag['tag'], array_keys($this->_definedTags)) == false) ) {
return false; /* nope, it's not valid */
} else {
@@ -382,29 +383,29 @@ function _buildTag($str)
split the tag with arguments and all */
$oe = $this->_options['open_esc'];
$ce = $this->_options['close_esc'];
- if (preg_match("!$oe([a-z]+)[^$ce]*$ce!i", $str, $tagArray) == 0) {
+ $tagArray = array();
+ if (preg_match("!$oe([a-z0-9]+)[^$ce]*$ce!i", $str, $tagArray) == 0) {
return false;
}
- $tag['tag'] = $tagArray[1];
+ $tag['tag'] = strtolower($tagArray[1]);
if ( (in_array($tag['tag'], array_keys($this->_definedTags)) == false) ) {
return false; /* nope, it's not valid */
}
/* tnx to Onno for the regex
validate the arguments */
- preg_match_all("![\s$oe]([a-z]+)=([^\s$ce]+)(?=[\s$ce])!i", $str, $attributeArray, PREG_SET_ORDER);
+ $attributeArray = array();
+ preg_match_all("![\s$oe]([a-z0-9]+)=([^\s$ce]+)(?=[\s$ce])!i", $str, $attributeArray, PREG_SET_ORDER);
foreach ($attributeArray as $attribute) {
- if ( (in_array($attribute[1], array_keys($this->_definedTags[$tag['tag']]['attributes'])) == true) ) {
- $tag['attributes'][$attribute[1]] = $attribute[2];
+ $attNam = strtolower($attribute[1]);
+ if ( (in_array($attNam, array_keys($this->_definedTags[$tag['tag']]['attributes'])) == true) ) {
+ $tag['attributes'][$attNam] = $attribute[2];
}
}
return $tag;
}
}
-
-
-
/**
* Validates the tag array, regarding the allowed tags
*
@@ -419,16 +420,27 @@ function _buildTag($str)
* @access private
* @see _isAllowed()
* @see $_tagArray
- * @author Stijn de Reede <sjr@gmx.co.uk>
+ * @author Stijn de Reede <sjr@gmx.co.uk>, Seth Price <seth@pricepages.org>
*/
- function _validateTagArray()
- {
+ function _validateTagArray(){
$newTagArray = array();
$openTags = array();
- foreach ($this->_tagArray as $tag) {
+ foreach($this->_tagArray as $tag) {
$prevTag = end($newTagArray);
switch ($tag['type']) {
case 0:
+ if(($child = $this->_childNeeded(end($openTags), 'text')) &&
+ $child !== false &&
+ /*
+ * No idea what to do in this case: A child is needed, but
+ * no valid one is returned. We'll ignore it here and live
+ * with it until someone reports a valid bug.
+ */
+ $child !== true ){
+
+ $newTagArray[] = $child;
+ $openTags[] = $child['tag'];
+ }
if ($prevTag['type'] === 0) {
$tag['text'] = $prevTag['text'].$tag['text'];
array_pop($newTagArray);
@@ -437,20 +449,43 @@ function _validateTagArray()
break;
case 1:
- if ($this->_isAllowed(end($openTags), $tag['tag']) == false) {
+ if(($this->_isAllowed(end($openTags), $tag['tag']) == false) ||
+ ($parent = $this->_parentNeeded(end($openTags), $tag['tag'])) === true ||
+ ($child = $this->_childNeeded(end($openTags), $tag['tag'])) === true) {
$tag['type'] = 0;
if ($prevTag['type'] === 0) {
$tag['text'] = $prevTag['text'].$tag['text'];
array_pop($newTagArray);
}
} else {
+ if($parent) {
+ /*
+ * Avoid use of parent if we can help it. If we are
+ * trying to insert a new parent, but the current tag is
+ * the same as the previous tag, then assume that the
+ * previous tag structure is valid, and add this tag as
+ * a sibling. To add as a sibling, we need to close the
+ * current tag.
+ */
+ if($tag['tag'] == end($openTags)){
+ $newTagArray[] = $this->_buildTag('[/'.$tag['tag'].']');
+ array_pop($openTags);
+ } else {
+ $newTagArray[] = $parent;
+ $openTags[] = $parent['tag'];
+ }
+ }
+ if($child) {
+ $newTagArray[] = $child;
+ $openTags[] = $child['tag'];
+ }
$openTags[] = $tag['tag'];
}
$newTagArray[] = $tag;
break;
case 2:
- if ( ($this->_isAllowed(end($openTags), $tag['tag']) == true) || ($tag['tag'] == end($openTags)) ) {
+ if(($tag['tag'] == end($openTags) || ($this->_isAllowed(end($openTags), $tag['tag']) == true))) {
if (in_array($tag['tag'], $openTags)) {
$tmpOpenTags = array();
while (end($openTags) != $tag['tag']) {
@@ -460,12 +495,15 @@ function _validateTagArray()
}
$newTagArray[] = $tag;
array_pop($openTags);
+ /* why is this here? it just seems to break things
+ * (nested lists where closing tags need to be
+ * generated)
while (end($tmpOpenTags)) {
$tmpTag = $this->_buildTag('['.end($tmpOpenTags).']');
$newTagArray[] = $tmpTag;
$openTags[] = $tmpTag['tag'];
array_pop($tmpOpenTags);
- }
+ }*/
}
} else {
$tag['type'] = 0;
@@ -485,8 +523,73 @@ function _validateTagArray()
$this->_tagArray = $newTagArray;
}
+ /**
+ * Checks to see if a parent is needed
+ *
+ * Checks to see if the current $in tag has an appropriate parent. If it
+ * does, then it returns false. If a parent is needed, then it returns the
+ * first tag in the list to add to the stack.
+ *
+ * @param array tag that is on the outside
+ * @param array tag that is on the inside
+ * @return boolean false if not needed, tag if needed, true if out
+ * of our minds
+ * @access private
+ * @see _validateTagArray()
+ * @author Seth Price <seth@pricepages.org>
+ */
+ function _parentNeeded($out, $in){
+ if(!isset($this->_definedTags[$in]['parent'])) return false;
+ if ($this->_definedTags[$in]['parent'] == 'all') return false;
+
+ $ar = explode('^', $this->_definedTags[$in]['parent']);
+ $tags = explode(',', $ar[1]);
+ if($ar[0] == 'none'){
+ if($out && in_array($out, $tags)) return false;
+ //Create a tag from the first one on the list
+ return $this->_buildTag('['.$tags[0].']');
+ }
+ if($ar[0] == 'all' && $out && !in_array($out, $tags)) return false;
+ /*
+ * Tag is needed, we don't know which one. We could make something up,
+ * but it would be so random, I think that it would be worthless.
+ */
+ return true;
+ }
-
+ /**
+ * Checks to see if a child is needed
+ *
+ * Checks to see if the current $out tag has an appropriate child. If it
+ * does, then it returns false. If a child is needed, then it returns the
+ * first tag in the list to add to the stack.
+ *
+ * @param array tag that is on the outside
+ * @param array tag that is on the inside
+ * @return boolean false if not needed, tag if needed, true if out
+ * of our minds
+ * @access private
+ * @see _validateTagArray()
+ * @author Seth Price <seth@pricepages.org>
+ */
+ function _childNeeded($out, $in){
+ if(!isset($this->_definedTags[$out]['child'])) return false;
+ if ($this->_definedTags[$out]['child'] == 'all') return false;
+
+ $ar = explode('^', $this->_definedTags[$out]['child']);
+ $tags = explode(',', $ar[1]);
+ if($ar[0] == 'none'){
+ if($in && in_array($in, $tags)) return false;
+ //Create a tag from the first one on the list
+ return $this->_buildTag('['.$tags[0].']');
+ }
+ if($ar[0] == 'all' && $in && !in_array($in, $tags)) return false;
+ /*
+ * Tag is needed, we don't know which one. We could make something up,
+ * but it would be so random, I think that it would be worthless.
+ */
+ return true;
+ }
/**
* Checks to see if a tag is allowed inside another tag
View
11 BBCodeParser/Filter/Basic.php
@@ -47,12 +47,12 @@ class HTML_BBCodeParser_Filter_Basic extends HTML_BBCodeParser
'htmlclose' => 'em',
'allowed' => 'all',
'attributes'=> array()),
- 'u' => array( 'htmlopen' => 'u',
- 'htmlclose' => 'u',
+ 'u' => array( 'htmlopen' => 'span style="text-decoration:underline;"',
+ 'htmlclose' => 'span',
'allowed' => 'all',
'attributes'=> array()),
- 's' => array( 'htmlopen' => 'strike',
- 'htmlclose' => 'strike',
+ 's' => array( 'htmlopen' => 'del',
+ 'htmlclose' => 'del',
'allowed' => 'all',
'attributes'=> array()),
'sub' => array( 'htmlopen' => 'sub',
@@ -65,8 +65,7 @@ class HTML_BBCodeParser_Filter_Basic extends HTML_BBCodeParser
'attributes'=> array())
);
-
}
-?>
+?>
View
35 BBCodeParser/Filter/Extended.php
@@ -43,19 +43,19 @@ class HTML_BBCodeParser_Filter_Extended extends HTML_BBCodeParser
'color' => array( 'htmlopen' => 'span',
'htmlclose' => 'span',
'allowed' => 'all',
- 'attributes'=> array('color' =>'style=%2$scolor: %1$s%2$s')),
+ 'attributes'=> array('color' =>'style=%2$scolor:%1$s%2$s')),
'size' => array( 'htmlopen' => 'span',
'htmlclose' => 'span',
'allowed' => 'all',
- 'attributes'=> array('size' =>'style=%2$sfont-size: %1$spt%2$s')),
+ 'attributes'=> array('size' =>'style=%2$sfont-size:%1$spt%2$s')),
'font' => array( 'htmlopen' => 'span',
'htmlclose' => 'span',
'allowed' => 'all',
- 'attributes'=> array('font' =>'style=%2$sfont-family: %1$s%2$s')),
+ 'attributes'=> array('font' =>'style=%2$sfont-family:%1$s%2$s')),
'align' => array( 'htmlopen' => 'div',
'htmlclose' => 'div',
'allowed' => 'all',
- 'attributes'=> array('align' =>'style=%2$stext-align: %1$s%2$s')),
+ 'attributes'=> array('align' =>'style=%2$stext-align:%1$s%2$s')),
'quote' => array('htmlopen' => 'q',
'htmlclose' => 'q',
'allowed' => 'all',
@@ -63,6 +63,30 @@ class HTML_BBCodeParser_Filter_Extended extends HTML_BBCodeParser
'code' => array('htmlopen' => 'code',
'htmlclose' => 'code',
'allowed' => 'all',
+ 'attributes'=> array()),
+ 'h1' => array('htmlopen' => 'h1',
+ 'htmlclose' => 'h1',
+ 'allowed' => 'all',
+ 'attributes'=> array()),
+ 'h2' => array('htmlopen' => 'h2',
+ 'htmlclose' => 'h2',
+ 'allowed' => 'all',
+ 'attributes'=> array()),
+ 'h3' => array('htmlopen' => 'h3',
+ 'htmlclose' => 'h3',
+ 'allowed' => 'all',
+ 'attributes'=> array()),
+ 'h4' => array('htmlopen' => 'h4',
+ 'htmlclose' => 'h4',
+ 'allowed' => 'all',
+ 'attributes'=> array()),
+ 'h5' => array('htmlopen' => 'h5',
+ 'htmlclose' => 'h5',
+ 'allowed' => 'all',
+ 'attributes'=> array()),
+ 'h6' => array('htmlopen' => 'h6',
+ 'htmlclose' => 'h6',
+ 'allowed' => 'all',
'attributes'=> array())
);
@@ -70,5 +94,4 @@ class HTML_BBCodeParser_Filter_Extended extends HTML_BBCodeParser
}
-
-?>
+?>
View
7 BBCodeParser/Filter/Images.php
@@ -74,11 +74,14 @@ function _preparse()
$c = $options['close'];
$oe = $options['open_esc'];
$ce = $options['close_esc'];
- $this->_preparsed = preg_replace("!".$oe."img(".$ce."|\s.*".$ce.")(.*)".$oe."/img".$ce."!Ui", $o."img=\\2\\1".$o."/img".$c, $this->_text);
+ $this->_preparsed = preg_replace(
+ "!".$oe."img(\s?.*)".$ce."(.*)".$oe."/img".$ce."!Ui",
+ $o."img=\$2\$1".$c.$o."/img".$c,
+ $this->_text);
}
}
-?>
+?>
View
138 BBCodeParser/Filter/Links.php
@@ -24,15 +24,33 @@
* @author Stijn de Reede <sjr@gmx.co.uk>
*/
-
+/**
+ *
+ */
require_once('HTML/BBCodeParser.php');
-
-
-
+/**
+ *
+ */
class HTML_BBCodeParser_Filter_Links extends HTML_BBCodeParser
{
-
+
+ /**
+ * List of allowed schemes
+ *
+ * @access private
+ * @var array
+ */
+ var $_allowedSchemes = array('http', 'https', 'ftp');
+
+ /**
+ * Default scheme
+ *
+ * @access private
+ * @var string
+ */
+ var $_defaultScheme = 'http';
+
/**
* An array of tags parsed by the engine
*
@@ -42,8 +60,7 @@ class HTML_BBCodeParser_Filter_Links extends HTML_BBCodeParser
var $_definedTags = array( 'url' => array( 'htmlopen' => 'a',
'htmlclose' => 'a',
'allowed' => 'none^img',
- 'attributes'=> array( 'url' => 'href=%2$s%1$s%2$s',
- 't' => 'target=%2$s%1$s%2$s')
+ 'attributes'=> array('url' => 'href=%2$s%1$s%2$s')
)
);
@@ -62,24 +79,109 @@ class HTML_BBCodeParser_Filter_Links extends HTML_BBCodeParser
* @return none
* @access private
* @see $_text
- * @author Stijn de Reede <sjr@gmx.co.uk>
+ * @author Stijn de Reede <sjr@gmx.co.uk>, Seth Price <seth@pricepages.org>
*/
- function _preparse()
- {
+ function _preparse(){
$options = PEAR::getStaticProperty('HTML_BBCodeParser','_options');
$o = $options['open'];
$c = $options['close'];
$oe = $options['open_esc'];
$ce = $options['close_esc'];
- $pattern = array( "!(^|\s|\()((((http(s?)|ftp)://)|www)[-a-z0-9.]+\.[a-z]{2,4}[^\s()]*)!i",
- "!".$oe."url(".$ce."|\s.*".$ce.")(.*)".$oe."/url".$ce."!iU");
- $replace = array( "\\1".$o."url".$c."\\2".$o."/url".$c,
- $o."url=\\2\\1\\2".$o."/url".$c);
- $this->_preparsed = preg_replace($pattern, $replace, $this->_text);
- }
+
+ $schemes = implode('|', $this->_allowedSchemes);
+
+ $pattern = array( "/(?<![\"'=".$ce."\/])(".$oe."[^".$ce."]*".$ce.")?(((".$schemes."):\/\/|www)[@-a-z0-9.]+\.[a-z]{2,4}[^\s()\[\]]*)/i",
+ "!".$oe."url(".$ce."|\s.*".$ce.")(.*)".$oe."/url".$ce."!iU",
+ "!".$oe."url=((([a-z]*:(//)?)|www)[@-a-z0-9.]+)([^\s\[\]]*)".$ce."(.*)".$oe."/url".$ce."!i");
+ $pp = preg_replace_callback($pattern[0], array($this, 'smarterPPLinkExpand'), $this->_text);
+ $pp = preg_replace($pattern[1], $o."url=\$2\$1\$2".$o."/url".$c, $pp);
+ $this->_preparsed = preg_replace_callback($pattern[2], array($this, 'smarterPPLink'), $pp);
+
+ }
+
+ /**
+ * Intelligently expand a URL into a link
+ *
+ * @return string
+ * @access private
+ * @author Seth Price <seth@pricepages.org>
+ */
+ function smarterPPLinkExpand($matches){
+ $options = PEAR::getStaticProperty('HTML_BBCodeParser','_options');
+ $o = $options['open'];
+ $c = $options['close'];
+
+ //If we have an intro tag that is [url], then skip this match
+ if($matches[1] == $o.'url'.$c){
+ return $matches[0];
+ }
+
+ $off = strpos($matches[2], ':');
+
+ //Is a ":" (therefore a scheme) defined?
+ if($off === false){
+ /*
+ * Create a link with the default scheme of http. Notice that the
+ * text that is viewable to the user is unchanged, but the link
+ * itself contains the "http://".
+ */
+ return $matches[1].$o.'url='.$this->_defaultScheme.'://'.$matches[2].$c.$matches[2].$o.'/url'.$c;
+ }
-}
+ $scheme = substr($matches[2], 0, $off);
+
+ /*
+ * If protocol is in the approved list than allow it. Note that this
+ * check isn't really needed, but the created link will just be deleted
+ * later in smarterPPLink() if we create it now and it isn't on the
+ * scheme list.
+ */
+ if(in_array($scheme, $this->_allowedSchemes)){
+ return $matches[1].$o.'url'.$c.$matches[2].$o.'/url'.$c;
+ } else {
+ return $matches[0];
+ }
+ }
+
+ /**
+ * Finish preparsing URL to clean it up
+ *
+ * @return string
+ * @access private
+ * @author Seth Price <seth@pricepages.org>
+ */
+ function smarterPPLink($matches){
+ $options = PEAR::getStaticProperty('HTML_BBCodeParser','_options');
+ $o = $options['open'];
+ $c = $options['close'];
+
+ $urlServ = $matches[1];
+ $path = $matches[5];
+
+ $off = strpos($urlServ, ':');
+
+ if($off === false){
+ //Default to http
+ $urlServ = $this->_defaultScheme.'://'.$urlServ;
+ $off = strpos($urlServ, ':');
+ }
+
+ //Add trailing slash if missing (to create a valid URL)
+ if(!$path){
+ $path = '/';
+ }
+ $protocol = substr($urlServ, 0, $off);
+
+ if(in_array($protocol, $this->_allowedSchemes)){
+ //If protocol is in the approved list than allow it
+ return $o.'url='.$urlServ.$path.$c.$matches[6].$o.'/url'.$c;
+ } else {
+ //Else remove url tag
+ return $matches[6];
+ }
+ }
+}
-?>
+?>
View
51 BBCodeParser/Filter/Lists.php
@@ -25,12 +25,14 @@
* @author Stijn de Reede <sjr@gmx.co.uk>
*/
-
+/**
+ *
+ */
require_once('HTML/BBCodeParser.php');
-
-
-
+/**
+ *
+ */
class HTML_BBCodeParser_Filter_Lists extends HTML_BBCodeParser
{
@@ -42,19 +44,21 @@ class HTML_BBCodeParser_Filter_Lists extends HTML_BBCodeParser
*/
var $_definedTags = array( 'list' => array( 'htmlopen' => 'ol',
'htmlclose' => 'ol',
- 'allowed' => 'none^li',
- 'attributes'=> array( 'list' => 'type=%2$s%1$s%2$s',
- 's' => 'start=%2$s%1$d%2$s')
+ 'allowed' => 'all',
+ 'child' => 'none^li',
+ 'attributes'=> array('list' => 'style=%2$slist-style-type:%1$s;%2$s')
),
'ulist' => array( 'htmlopen' => 'ul',
'htmlclose' => 'ul',
- 'allowed' => 'none^li',
- 'attributes'=> array()
+ 'allowed' => 'all',
+ 'child' => 'none^li',
+ 'attributes'=> array('list' => 'style=%2$slist-style-type:%1$s;%2$s')
),
'li' => array( 'htmlopen' => 'li',
'htmlclose' => 'li',
'allowed' => 'all',
- 'attributes'=> array( 'li' => 'value=%2$s%1$d%2$s')
+ 'parent' => 'none^ulist,list',
+ 'attributes'=> array()
)
);
@@ -73,7 +77,7 @@ class HTML_BBCodeParser_Filter_Lists extends HTML_BBCodeParser
* @return none
* @access private
* @see $_text
- * @author Stijn de Reede <sjr@gmx.co.uk>
+ * @author Stijn de Reede <sjr@gmx.co.uk>, Seth Price <seth@pricepages.org>
*/
function _preparse()
{
@@ -82,15 +86,26 @@ function _preparse()
$c = $options['close'];
$oe = $options['open_esc'];
$ce = $options['close_esc'];
- $pattern = array( "!".$oe."\*".$ce."(.*)!i",
- "!".$oe."list".$ce."(.+)".$oe."/list".$ce."!isU");
- $replace = array( $o."li".$c."\\1".$o."/li".$c,
- $o."ulist".$c."\\1".$o."/ulist".$c);
+
+ $pattern = array( "!".$oe."\*".$ce."!",
+ "!".$oe."(u?)list=(?-i:A)(\s*[^".$ce."]*)".$ce."!i",
+ "!".$oe."(u?)list=(?-i:a)(\s*[^".$ce."]*)".$ce."!i",
+ "!".$oe."(u?)list=(?-i:I)(\s*[^".$ce."]*)".$ce."!i",
+ "!".$oe."(u?)list=(?-i:i)(\s*[^".$ce."]*)".$ce."!i",
+ "!".$oe."(u?)list=(?-i:1)(\s*[^".$ce."]*)".$ce."!i",
+ "!".$oe."(u?)list([^".$ce."]*)".$ce."!i");
+
+ $replace = array( $o."li".$c,
+ $o."\$1list=upper-alpha\$2".$c,
+ $o."\$1list=lower-alpha\$2".$c,
+ $o."\$1list=upper-roman\$2".$c,
+ $o."\$1list=lower-roman\$2".$c,
+ $o."\$1list=decimal\$2".$c,
+ $o."\$1list\$2".$c );
+
$this->_preparsed = preg_replace($pattern, $replace, $this->_text);
}
-
-
}
-?>
+?>
Please sign in to comment.
Something went wrong with that request. Please try again.