Permalink
Browse files

[CssSelector] fully rewritted component

Squashed commits:
[CssSelector] removed previous implementation
[CssSelector] rewriting, step 1
[CssSelector] rewriting, step 2
[CssSelector] rewriting, step 3
[CssSelector] rewriting, step 4
[CssSelector] rewriting, step 5
[CssSelector] rewriting, step 6
[CssSelector] fixed shortcuts regex
[CssSelector] tests, step1
[CssSelector] tests, step2
[CssSelector] tests, step3
[CssSelector] tests, step4
[CssSelector] fixed problems based @stof's on feedback
[CssSelector] tests, step5
[CssSelector] tests, step6
[CssSelector] tests, step7
[CssSelector] added my name in composer.json
  • Loading branch information...
1 parent bd53382 commit c6f87d0026817aac185c0385d7182f1dcbe0cc93 @jfsimon jfsimon committed Mar 5, 2013
Showing with 6,298 additions and 2,161 deletions.
  1. +23 −278 src/Symfony/Component/CssSelector/CssSelector.php
  2. +62 −0 src/Symfony/Component/CssSelector/CssSelectorTest.php
  3. +24 −0 src/Symfony/Component/CssSelector/Exception/ExceptionInterface.php
  4. +24 −0 src/Symfony/Component/CssSelector/Exception/ExpressionErrorException.php
  5. +24 −0 src/Symfony/Component/CssSelector/Exception/InternalErrorException.php
  6. +3 −3 src/Symfony/Component/CssSelector/Exception/ParseException.php
  7. +73 −0 src/Symfony/Component/CssSelector/Exception/SyntaxErrorException.php
  8. +40 −0 src/Symfony/Component/CssSelector/Node/AbstractNode.php
  9. +0 −131 src/Symfony/Component/CssSelector/Node/AttribNode.php
  10. +124 −0 src/Symfony/Component/CssSelector/Node/AttributeNode.php
  11. +39 −23 src/Symfony/Component/CssSelector/Node/ClassNode.php
  12. +38 −88 src/Symfony/Component/CssSelector/Node/CombinedSelectorNode.php
  13. +34 −34 src/Symfony/Component/CssSelector/Node/ElementNode.php
  14. +37 −231 src/Symfony/Component/CssSelector/Node/FunctionNode.php
  15. +38 −22 src/Symfony/Component/CssSelector/Node/HashNode.php
  16. +75 −0 src/Symfony/Component/CssSelector/Node/NegationNode.php
  17. +17 −10 src/Symfony/Component/CssSelector/Node/NodeInterface.php
  18. +0 −61 src/Symfony/Component/CssSelector/Node/OrNode.php
  19. +26 −182 src/Symfony/Component/CssSelector/Node/PseudoNode.php
  20. +75 −0 src/Symfony/Component/CssSelector/Node/SelectorNode.php
  21. +78 −0 src/Symfony/Component/CssSelector/Node/Specificity.php
  22. +47 −0 src/Symfony/Component/CssSelector/Parser/Handler/CommentHandler.php
  23. +35 −0 src/Symfony/Component/CssSelector/Parser/Handler/HandlerInterface.php
  24. +67 −0 src/Symfony/Component/CssSelector/Parser/Handler/HashHandler.php
  25. +67 −0 src/Symfony/Component/CssSelector/Parser/Handler/IdentifierHandler.php
  26. +58 −0 src/Symfony/Component/CssSelector/Parser/Handler/NumberHandler.php
  27. +86 −0 src/Symfony/Component/CssSelector/Parser/Handler/StringHandler.php
  28. +44 −0 src/Symfony/Component/CssSelector/Parser/Handler/WhitespaceHandler.php
  29. +395 −0 src/Symfony/Component/CssSelector/Parser/Parser.php
  30. +34 −0 src/Symfony/Component/CssSelector/Parser/ParserInterface.php
  31. +126 −0 src/Symfony/Component/CssSelector/Parser/Reader.php
  32. +42 −0 src/Symfony/Component/CssSelector/Parser/Shortcut/ClassParser.php
  33. +41 −0 src/Symfony/Component/CssSelector/Parser/Shortcut/ElementParser.php
  34. +45 −0 src/Symfony/Component/CssSelector/Parser/Shortcut/EmptyStringParser.php
  35. +42 −0 src/Symfony/Component/CssSelector/Parser/Shortcut/HashParser.php
  36. +160 −0 src/Symfony/Component/CssSelector/Parser/Token.php
  37. +182 −0 src/Symfony/Component/CssSelector/Parser/TokenStream.php
  38. +78 −0 src/Symfony/Component/CssSelector/Parser/Tokenizer/Tokenizer.php
  39. +78 −0 src/Symfony/Component/CssSelector/Parser/Tokenizer/TokenizerEscaping.php
  40. +160 −0 src/Symfony/Component/CssSelector/Parser/Tokenizer/TokenizerPatterns.php
  41. +0 −71 src/Symfony/Component/CssSelector/Tests/CssSelectorTest.php
  42. +32 −0 src/Symfony/Component/CssSelector/Tests/Node/AbstractNodeTest.php
  43. +0 −43 src/Symfony/Component/CssSelector/Tests/Node/AttribNodeTest.php
  44. +37 −0 src/Symfony/Component/CssSelector/Tests/Node/AttributeNodeTest.php
  45. +12 −6 src/Symfony/Component/CssSelector/Tests/Node/ClassNodeTest.php
  46. +13 −14 src/Symfony/Component/CssSelector/Tests/Node/CombinedSelectorNodeTest.php
  47. +15 −10 src/Symfony/Component/CssSelector/Tests/Node/ElementNodeTest.php
  48. +27 −76 src/Symfony/Component/CssSelector/Tests/Node/FunctionNodeTest.php
  49. +12 −6 src/Symfony/Component/CssSelector/Tests/Node/HashNodeTest.php
  50. +33 −0 src/Symfony/Component/CssSelector/Tests/Node/NegationNodeTest.php
  51. +0 −43 src/Symfony/Component/CssSelector/Tests/Node/OrNodeTest.php
  52. +12 −35 src/Symfony/Component/CssSelector/Tests/Node/PseudoNodeTest.php
  53. +34 −0 src/Symfony/Component/CssSelector/Tests/Node/SelectorNodeTest.php
  54. +40 −0 src/Symfony/Component/CssSelector/Tests/Node/SpecificityTest.php
  55. +67 −0 src/Symfony/Component/CssSelector/Tests/Parser/Handler/AbstractHandlerTest.php
  56. +55 −0 src/Symfony/Component/CssSelector/Tests/Parser/Handler/CommentHandlerTest.php
  57. +49 −0 src/Symfony/Component/CssSelector/Tests/Parser/Handler/HashHandlerTest.php
  58. +49 −0 src/Symfony/Component/CssSelector/Tests/Parser/Handler/IdentifierHandlerTest.php
  59. +51 −0 src/Symfony/Component/CssSelector/Tests/Parser/Handler/NumberHandlerTest.php
  60. +50 −0 src/Symfony/Component/CssSelector/Tests/Parser/Handler/StringHandlerTest.php
  61. +44 −0 src/Symfony/Component/CssSelector/Tests/Parser/Handler/WhitespaceHandlerTest.php
  62. +247 −0 src/Symfony/Component/CssSelector/Tests/Parser/ParserTest.php
  63. +101 −0 src/Symfony/Component/CssSelector/Tests/Parser/ReaderTest.php
  64. +40 −0 src/Symfony/Component/CssSelector/Tests/Parser/Shortcut/ClassParserTest.php
  65. +42 −0 src/Symfony/Component/CssSelector/Tests/Parser/Shortcut/ElementParserTest.php
  66. +41 −0 src/Symfony/Component/CssSelector/Tests/Parser/Shortcut/HashParserTest.php
  67. +95 −0 src/Symfony/Component/CssSelector/Tests/Parser/TokenStreamTest.php
  68. +0 −72 src/Symfony/Component/CssSelector/Tests/TokenizerTest.php
  69. +48 −0 src/Symfony/Component/CssSelector/Tests/XPath/Fixtures/ids.html
  70. +11 −0 src/Symfony/Component/CssSelector/Tests/XPath/Fixtures/lang.xml
  71. +308 −0 src/Symfony/Component/CssSelector/Tests/XPath/Fixtures/shakespear.html
  72. +308 −0 src/Symfony/Component/CssSelector/Tests/XPath/TranslatorTest.php
  73. +0 −35 src/Symfony/Component/CssSelector/Tests/XPathExprTest.php
  74. +31 −0 src/Symfony/Component/CssSelector/Tests/bootstrap.php
  75. +0 −73 src/Symfony/Component/CssSelector/Token.php
  76. +0 −105 src/Symfony/Component/CssSelector/TokenStream.php
  77. +0 −201 src/Symfony/Component/CssSelector/Tokenizer.php
  78. +63 −0 src/Symfony/Component/CssSelector/XPath/Extension/AbstractExtension.php
  79. +173 −0 src/Symfony/Component/CssSelector/XPath/Extension/AttributeMatchingExtension.php
  80. +93 −0 src/Symfony/Component/CssSelector/XPath/Extension/CombinationExtension.php
  81. +65 −0 src/Symfony/Component/CssSelector/XPath/Extension/ExtensionInterface.php
  82. +198 −0 src/Symfony/Component/CssSelector/XPath/Extension/FunctionExtension.php
  83. +238 −0 src/Symfony/Component/CssSelector/XPath/Extension/HtmlExtension.php
  84. +270 −0 src/Symfony/Component/CssSelector/XPath/Extension/NodeExtension.php
  85. +162 −0 src/Symfony/Component/CssSelector/XPath/Extension/PseudoClassExtension.php
  86. +302 −0 src/Symfony/Component/CssSelector/XPath/Translator.php
  87. +45 −0 src/Symfony/Component/CssSelector/XPath/TranslatorInterface.php
  88. +140 −0 src/Symfony/Component/CssSelector/XPath/XPathExpr.php
  89. +0 −254 src/Symfony/Component/CssSelector/XPathExpr.php
  90. +0 −54 src/Symfony/Component/CssSelector/XPathExprOr.php
  91. +4 −0 src/Symfony/Component/CssSelector/composer.json
@@ -11,16 +11,22 @@
namespace Symfony\Component\CssSelector;
-use Symfony\Component\CssSelector\Exception\ParseException;
+use Symfony\Component\CssSelector\Exception;
+use Symfony\Component\CssSelector\Parser\Shortcut\ClassParser;
+use Symfony\Component\CssSelector\Parser\Shortcut\ElementParser;
+use Symfony\Component\CssSelector\Parser\Shortcut\EmptyStringParser;
+use Symfony\Component\CssSelector\Parser\Shortcut\HashParser;
+use Symfony\Component\CssSelector\XPath\Extension\HtmlExtension;
+use Symfony\Component\CssSelector\XPath\Translator;
/**
* CssSelector is the main entry point of the component and can convert CSS
* selectors to XPath expressions.
*
* $xpath = CssSelector::toXpath('h1.foo');
*
- * This component is a port of the Python lxml library,
- * which is copyright Infrae and distributed under the BSD license.
+ * This component is a port of the Python cssselector library,
+ * which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect.
*
* @author Fabien Potencier <fabien@symfony.com>
*
@@ -33,290 +39,29 @@ class CssSelector
* Optionally, a prefix can be added to the resulting XPath
* expression with the $prefix parameter.
*
- * @param mixed $cssExpr The CSS expression.
- * @param string $prefix An optional prefix for the XPath expression.
+ * @param mixed $cssExpr The CSS expression.
+ * @param string $prefix An optional prefix for the XPath expression.
+ * @param boolean $html Enables HTML extension.
*
* @return string
*
- * @throws ParseException When got None for xpath expression
- *
* @api
*/
- public static function toXPath($cssExpr, $prefix = 'descendant-or-self::')
- {
- if (is_string($cssExpr)) {
- if (!$cssExpr) {
- return $prefix.'*';
- }
-
- if (preg_match('#^\w+\s*$#u', $cssExpr, $match)) {
- return $prefix.trim($match[0]);
- }
-
- if (preg_match('~^(\w*)#(\w+)\s*$~u', $cssExpr, $match)) {
- return sprintf("%s%s[@id = '%s']", $prefix, $match[1] ? $match[1] : '*', $match[2]);
- }
-
- if (preg_match('#^(\w*)\.(\w+)\s*$#u', $cssExpr, $match)) {
- return sprintf("%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]", $prefix, $match[1] ? $match[1] : '*', $match[2]);
- }
-
- $parser = new self();
- $cssExpr = $parser->parse($cssExpr);
- }
-
- $expr = $cssExpr->toXpath();
-
- // @codeCoverageIgnoreStart
- if (!$expr) {
- throw new ParseException(sprintf('Got None for xpath expression from %s.', $cssExpr));
- }
- // @codeCoverageIgnoreEnd
-
- if ($prefix) {
- $expr->addPrefix($prefix);
- }
-
- return (string) $expr;
- }
-
- /**
- * Parses an expression and returns the Node object that represents
- * the parsed expression.
- *
- * @param string $string The expression to parse
- *
- * @return Node\NodeInterface
- *
- * @throws \Exception When tokenizer throws it while parsing
- */
- public function parse($string)
- {
- $tokenizer = new Tokenizer();
-
- $stream = new TokenStream($tokenizer->tokenize($string), $string);
-
- try {
- return $this->parseSelectorGroup($stream);
- } catch (\Exception $e) {
- $class = get_class($e);
-
- throw new $class(sprintf('%s at %s -> %s', $e->getMessage(), implode($stream->getUsed(), ''), $stream->peek()), 0, $e);
- }
- }
-
- /**
- * Parses a selector group contained in $stream and returns
- * the Node object that represents the expression.
- *
- * @param TokenStream $stream The stream to parse.
- *
- * @return Node\NodeInterface
- */
- private function parseSelectorGroup($stream)
- {
- $result = array();
- while (true) {
- $result[] = $this->parseSelector($stream);
- if ($stream->peek() == ',') {
- $stream->next();
- } else {
- break;
- }
- }
-
- if (count($result) == 1) {
- return $result[0];
- }
-
- return new Node\OrNode($result);
- }
-
- /**
- * Parses a selector contained in $stream and returns the Node
- * object that represents it.
- *
- * @param TokenStream $stream The stream containing the selector.
- *
- * @return Node\NodeInterface
- *
- * @throws ParseException When expected selector but got something else
- */
- private function parseSelector($stream)
- {
- $result = $this->parseSimpleSelector($stream);
-
- while (true) {
- $peek = $stream->peek();
- if (',' == $peek || null === $peek) {
- return $result;
- } elseif (in_array($peek, array('+', '>', '~'))) {
- // A combinator
- $combinator = (string) $stream->next();
-
- // Ignore optional whitespace after a combinator
- while (' ' == $stream->peek()) {
- $stream->next();
- }
- } else {
- $combinator = ' ';
- }
- $consumed = count($stream->getUsed());
- $nextSelector = $this->parseSimpleSelector($stream);
- if ($consumed == count($stream->getUsed())) {
- throw new ParseException(sprintf("Expected selector, got '%s'", $stream->peek()));
- }
-
- $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
- }
-
- return $result;
- }
-
- /**
- * Parses a simple selector (the current token) from $stream and returns
- * the resulting Node object.
- *
- * @param TokenStream $stream The stream containing the selector.
- *
- * @return Node\NodeInterface
- *
- * @throws ParseException When expected symbol but got something else
- */
- private function parseSimpleSelector($stream)
- {
- $peek = $stream->peek();
- if ('*' != $peek && !$peek->isType('Symbol')) {
- $element = $namespace = '*';
- } else {
- $next = $stream->next();
- if ('*' != $next && !$next->isType('Symbol')) {
- throw new ParseException(sprintf("Expected symbol, got '%s'", $next));
- }
-
- if ($stream->peek() == '|') {
- $namespace = $next;
- $stream->next();
- $element = $stream->next();
- if ('*' != $element && !$next->isType('Symbol')) {
- throw new ParseException(sprintf("Expected symbol, got '%s'", $next));
- }
- } else {
- $namespace = '*';
- $element = $next;
- }
- }
-
- $result = new Node\ElementNode($namespace, $element);
- $hasHash = false;
- while (true) {
- $peek = $stream->peek();
- if ('#' == $peek) {
- if ($hasHash) {
- /* You can't have two hashes
- (FIXME: is there some more general rule I'm missing?) */
- // @codeCoverageIgnoreStart
- break;
- // @codeCoverageIgnoreEnd
- }
- $stream->next();
- $result = new Node\HashNode($result, $stream->next());
- $hasHash = true;
-
- continue;
- } elseif ('.' == $peek) {
- $stream->next();
- $result = new Node\ClassNode($result, $stream->next());
-
- continue;
- } elseif ('[' == $peek) {
- $stream->next();
- $result = $this->parseAttrib($result, $stream);
- $next = $stream->next();
- if (']' != $next) {
- throw new ParseException(sprintf("] expected, got '%s'", $next));
- }
-
- continue;
- } elseif (':' == $peek || '::' == $peek) {
- $type = $stream->next();
- $ident = $stream->next();
- if (!$ident || !$ident->isType('Symbol')) {
- throw new ParseException(sprintf("Expected symbol, got '%s'", $ident));
- }
-
- if ($stream->peek() == '(') {
- $stream->next();
- $peek = $stream->peek();
- if ($peek->isType('String')) {
- $selector = $stream->next();
- } elseif ($peek->isType('Symbol') && is_int($peek)) {
- $selector = intval($stream->next());
- } else {
- // FIXME: parseSimpleSelector, or selector, or...?
- $selector = $this->parseSimpleSelector($stream);
- }
- $next = $stream->next();
- if (')' != $next) {
- throw new ParseException(sprintf("Expected ')', got '%s' and '%s'", $next, $selector));
- }
-
- $result = new Node\FunctionNode($result, $type, $ident, $selector);
- } else {
- $result = new Node\PseudoNode($result, $type, $ident);
- }
-
- continue;
- } else {
- if (' ' == $peek) {
- $stream->next();
- }
-
- break;
- }
- // FIXME: not sure what "negation" is
- }
-
- return $result;
- }
-
- /**
- * Parses an attribute from a selector contained in $stream and returns
- * the resulting AttribNode object.
- *
- * @param Node\NodeInterface $selector The selector object whose attribute
- * is to be parsed.
- * @param TokenStream $stream The container token stream.
- *
- * @return Node\AttribNode
- *
- * @throws ParseException When encountered unexpected selector
- */
- private function parseAttrib($selector, $stream)
+ public static function toXPath($cssExpr, $prefix = 'descendant-or-self::', $html = true)
{
- $attrib = $stream->next();
- if ($stream->peek() == '|') {
- $namespace = $attrib;
- $stream->next();
- $attrib = $stream->next();
- } else {
- $namespace = '*';
- }
+ $translator = new Translator();
- if ($stream->peek() == ']') {
- return new Node\AttribNode($selector, $namespace, $attrib, 'exists', null);
+ if ($html) {
+ $translator->registerExtension(new HtmlExtension($translator));
}
- $op = $stream->next();
- if (!in_array($op, array('^=', '$=', '*=', '=', '~=', '|=', '!='))) {
- throw new ParseException(sprintf("Operator expected, got '%s'", $op));
- }
-
- $value = $stream->next();
- if (!$value->isType('Symbol') && !$value->isType('String')) {
- throw new ParseException(sprintf("Expected string or symbol, got '%s'", $value));
- }
+ $translator
+ ->registerParserShortcut(new EmptyStringParser())
+ ->registerParserShortcut(new ElementParser())
+ ->registerParserShortcut(new ClassParser())
+ ->registerParserShortcut(new HashParser())
+ ;
- return new Node\AttribNode($selector, $namespace, $attrib, $op, $value);
+ return $translator->cssToXPath($cssExpr, $prefix);
}
}
@@ -0,0 +1,62 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ *
+ * (c) Fabien Potencier <fabien@symfony.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace Symfony\Component\CssSelector;
+
+class CssSelectorTest extends \PHPUnit_Framework_TestCase
+{
+ public function testCssToXPath()
+ {
+ $this->assertEquals('descendant-or-self::*', CssSelector::toXPath(''));
+ $this->assertEquals('descendant-or-self::h1', CssSelector::toXPath('h1'));
+ $this->assertEquals("descendant-or-self::h1[@id = 'foo']", CssSelector::toXPath('h1#foo'));
+ $this->assertEquals("descendant-or-self::h1[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]", CssSelector::toXPath('h1.foo'));
+ $this->assertEquals('descendant-or-self::foo:h1', CssSelector::toXPath('foo|h1'));
+ }
+
+ /** @dataProvider getCssToXPathWithoutPrefixTestData */
+ public function testCssToXPathWithoutPrefix($css, $xpath)
+ {
+ $this->assertEquals($xpath, CssSelector::toXPath($css, ''), '->parse() parses an input string and returns a node');
+ }
+
+ public function testParseExceptions()
+ {
+ try {
+ CssSelector::toXPath('h1:');
+ $this->fail('->parse() throws an Exception if the css selector is not valid');
+ } catch (\Exception $e) {
+ $this->assertInstanceOf('\Symfony\Component\CssSelector\Exception\ParseException', $e, '->parse() throws an Exception if the css selector is not valid');
+ $this->assertEquals("Expected identifier, but <eof at 3> found.", $e->getMessage(), '->parse() throws an Exception if the css selector is not valid');
+ }
+ }
+
+ public function getCssToXPathWithoutPrefixTestData()
+ {
+ return array(
+ array('h1', "h1"),
+ array('foo|h1', "foo:h1"),
+ array('h1, h2, h3', "h1 | h2 | h3"),
+ array('h1:nth-child(3n+1)', "*/*[name() = 'h1' and ((position() -1) mod 3 = 0 and position() >= 1)]"),
+ array('h1 > p', "h1/p"),
+ array('h1#foo', "h1[@id = 'foo']"),
+ array('h1.foo', "h1[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]"),
+ array('h1[class*="foo bar"]', "h1[@class and contains(@class, 'foo bar')]"),
+ array('h1[foo|class*="foo bar"]', "h1[@foo:class and contains(@foo:class, 'foo bar')]"),
+ array('h1[class]', "h1[@class]"),
+ array('h1 .foo', "h1/descendant-or-self::*/*[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]"),
+ array('h1 #foo', "h1/descendant-or-self::*/*[@id = 'foo']"),
+ array('h1 [class*=foo]', "h1/descendant-or-self::*/*[@class and contains(@class, 'foo')]"),
+ array('div>.foo', "div/*[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]"),
+ array('div > .foo', "div/*[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]"),
+ );
+ }
+}
Oops, something went wrong.

0 comments on commit c6f87d0

Please sign in to comment.