diff --git a/src/PHPCR/Util/CND/Exception/ParserException.php b/src/PHPCR/Util/CND/Exception/ParserException.php new file mode 100644 index 00000000..9e3b2205 --- /dev/null +++ b/src/PHPCR/Util/CND/Exception/ParserException.php @@ -0,0 +1,31 @@ + + */ +class ParserException extends \Exception +{ + public function __construct(TokenQueue $queue, $msg) + { + $token = $queue->peek(); + $msg = sprintf("PARSER ERROR: %s. Current token is [%s, '%s'] at line %s, column %s", $msg, GenericToken::getTypeName($token->getType()), $token->getData(), $token->getLine(), $token->getRow()); + + // construct a lookup of the next tokens + $lookup = ''; + for($i = 1; $i <= 5; $i++) { + if ($queue->isEof()) { + break; + } + $token = $queue->get(); + $lookup .= $token->getData() . ' '; + } + $msg .= "\nBuffer lookup: \"$lookup\""; + + parent::__construct($msg); + } +} diff --git a/src/PHPCR/Util/CND/Exception/ScannerException.php b/src/PHPCR/Util/CND/Exception/ScannerException.php new file mode 100644 index 00000000..40794763 --- /dev/null +++ b/src/PHPCR/Util/CND/Exception/ScannerException.php @@ -0,0 +1,24 @@ + + */ +class ScannerException extends \Exception +{ + public function __construct(ReaderInterface $reader, $msg) + { + $msg = sprintf( + "SCANNER ERROR: %s at line %s, column %s.\nCurrent buffer \"%s\"", + $msg, + $reader->getCurrentLine(), + $reader->getCurrentColumn(), + $reader->consume() + ); + + parent::__construct($msg); + } +} diff --git a/src/PHPCR/Util/CND/Parser/AbstractParser.php b/src/PHPCR/Util/CND/Parser/AbstractParser.php new file mode 100644 index 00000000..bad7128a --- /dev/null +++ b/src/PHPCR/Util/CND/Parser/AbstractParser.php @@ -0,0 +1,118 @@ + + */ +abstract class AbstractParser +{ + /** + * The token queue + * @var TokenQueue + */ + protected $tokenQueue; + + /** + * Check the next token without consuming it and return true if it matches the given type and data. + * If the data is not provided (equal to null) then only the token type is checked. + * Return false otherwise. + * + * @param int $type The expected token type + * @param null|string $data The expected data or null + * + * @return bool + */ + protected function checkToken($type, $data = null) + { + if ($this->tokenQueue->isEof()) { + return false; + } + + $token = $this->tokenQueue->peek(); + + if ($token->getType() !== $type) { + return false; + } + + if ($data && $token->getData() !== $data) { + return false; + } + + return true; + } + + /** + * Check if the token data is one of the elements of the data array. + * + * @param int $type + * @param array $data + * + * @return bool + */ + protected function checkTokenIn($type, array $data) + { + foreach ($data as $d) { + if ($this->checkToken($type, $d)) { + return true; + } + } + + return false; + } + + /** + * Check if the next token matches the expected type and data. If it does, then consume and return it, + * otherwise throw an exception. + * + * @param int $type The expected token type + * @param null|string $data The expected token data or null + * + * @return Token + * + * @throws ParserException + */ + protected function expectToken($type, $data = null) + { + $token = $this->tokenQueue->peek(); + + if (!$this->checkToken($type, $data)) { + throw new ParserException($this->tokenQueue, sprintf("Expected token [%s, '%s']", Token::getTypeName($type), $data)); + } + + $this->tokenQueue->next(); + + return $token; + } + + /** + * Check if the next token matches the expected type and data. If it does, then consume it, otherwise + * return false. + * + * @param int $type The expected token type + * @param null|string $data The expected token data or null + * @return bool|Token + */ + protected function checkAndExpectToken($type, $data = null) + { + if ($this->checkToken($type, $data)) { + $token = $this->tokenQueue->peek(); + $this->tokenQueue->next(); + return $token; + } + + return false; + } +} diff --git a/src/PHPCR/Util/CND/Parser/CndParser.php b/src/PHPCR/Util/CND/Parser/CndParser.php new file mode 100644 index 00000000..3e7a2bf8 --- /dev/null +++ b/src/PHPCR/Util/CND/Parser/CndParser.php @@ -0,0 +1,775 @@ + + * @author David Buchmann + */ +class CndParser extends AbstractParser +{ + // node type attributes + private $ORDERABLE = array('o', 'ord', 'orderable');//, 'variant' => true); + private $MIXIN = array('m', 'mix', 'mixin');//, 'variant' => true); + private $ABSTRACT = array('a', 'abs', 'abstract');//, 'variant' => true); + private $NOQUERY = array('noquery', 'nq');//, 'variant' => false); + private $QUERY = array('query', 'q');//, 'variant' => false); + private $PRIMARYITEM = array('primaryitem', '!');//, 'variant' => false); + + // common for properties and child definitions + private $AUTOCREATED = array('a', 'aut', 'autocreated'); //, 'variant' => true), + private $MANDATORY = array('m', 'man', 'mandatory'); //, 'variant' => true), + private $PROTECTED = array('p', 'pro', 'protected'); //, 'variant' => true), + private $OPV = array('COPY', 'VERSION', 'INITIALIZE', 'COMPUTE', 'IGNORE', 'ABORT'); + + // property type attributes + private $MULTIPLE = array('*', 'mul', 'multiple'); //, 'variant' => true), + private $QUERYOPS = array('qop', 'queryops'); //, 'variant' => true), // Needs special handling ! + private $NOFULLTEXT = array('nof', 'nofulltext'); //, 'variant' => true), + private $NOQUERYORDER = array('nqord', 'noqueryorder'); //, 'variant' => true), + + // child node attributes + // multiple is actually a jackrabbit specific synonym for sns + // http://www.mail-archive.com/users@jackrabbit.apache.org/msg19268.html + private $SNS = array('*', 'sns', 'multiple'); //, 'variant' => true), + + /** + * @var NodeTypeManagerInterface + */ + private $ntm; + + /** + * @var array + */ + protected $namespaces = array(); + + /** + * @var array + */ + protected $nodeTypes = array(); + + /** + * @param NodeTypeManagerInterface $ntm + */ + public function __construct(NodeTypeManagerInterface $ntm) + { + $this->ntm = $ntm; + } + + /** + * Parse a file with CND statements. + * + * @param string $filename absolute path to the CND file to read + * + * @return array with the namespaces map and the nodeTypes which is a + * hashmap of typename = > NodeTypeDefinitionInterface + */ + public function parseFile($filename) + { + $reader = new FileReader($filename); + + return $this->parse($reader); + } + + /** + * Parse a string of CND statements. + * + * @param string $cnd string with CND content + * + * @return array with the namespaces map and the nodeTypes which is a + * hashmap of typename = > NodeTypeDefinitionInterface + */ + public function parseString($cnd) + { + $reader = new BufferReader($cnd); + + return $this->parse($reader); + } + + private function parse(ReaderInterface $reader) + { + $scanner = new GenericScanner(new DefaultScannerContextWithoutSpacesAndComments()); + $this->tokenQueue = $scanner->scan($reader); + + while (!$this->tokenQueue->isEof()) { + + while ($this->checkToken(Token::TK_SYMBOL, '<')) { + $this->parseNamespaceMapping(); + } + + if (!$this->tokenQueue->isEof()) { + $this->parseNodeType(); + } + + } + + return array( + 'namespaces' => $this->namespaces, + 'nodeTypes' => $this->nodeTypes, + ); + } + + /** + * A namespace declaration consists of prefix/URI pair. The prefix must be + * a valid JCR namespace prefix, which is the same as a valid XML namespace + * prefix. The URI can in fact be any string. Just as in XML, it need not + * actually be a URI, though adhering to that convention is recommended. + * + * NamespaceMapping ::= '<' Prefix '=' Uri '>' + * Prefix ::= String + * Uri ::= String + */ + protected function parseNamespaceMapping() + { + $this->expectToken(Token::TK_SYMBOL, '<'); + $prefix = $this->parseCndString(); + $this->expectToken(Token::TK_SYMBOL, '='); + $uri = substr($this->expectToken(Token::TK_STRING)->getData(), 1, -1); + $this->expectToken(Token::TK_SYMBOL, '>'); + + $this->namespaces[$prefix] = $uri; + } + + /** + * A node type definition consists of a node type name followed by an optional + * supertypes block, an optional node type attributes block and zero or more + * blocks, each of which is either a property or child node definition. + * + * NodeTypeDef ::= NodeTypeName [Supertypes] + * [NodeTypeAttribute {NodeTypeAttribute}] + * {PropertyDef | ChildNodeDef} + */ + protected function parseNodeType() + { + $nodeType = $this->ntm->createNodeTypeTemplate(); + $this->parseNodeTypeName($nodeType); + + if ($this->checkToken(Token::TK_SYMBOL, '>')) { + $this->parseSupertypes($nodeType); + } + + $this->parseNodeTypeAttributes($nodeType); + + $this->parseChildrenAndAttributes($nodeType); + + $this->nodeTypes[$nodeType->getName()] = $nodeType; + } + + /** + * The node type name is delimited by square brackets and must be a valid JCR name. + * + * NodeTypeName ::= '[' String ']' + */ + protected function parseNodeTypeName(NodeTypeTemplateInterface $nodeType) + { + $this->expectToken(Token::TK_SYMBOL, '['); + $name = $this->parseCndString(); + $this->expectToken(Token::TK_SYMBOL, ']'); + + $nodeType->setName($name); + } + + /** + * The list of supertypes is prefixed by a '>'. If the node type is not a + * mixin then it implicitly has nt:base as a supertype even if neither + * nt:base nor a subtype of nt:base appears in the list or if this element + * is absent. A question mark indicates that the supertypes list is a variant. + * + * Supertypes ::= '>' (StringList | '?') + */ + protected function parseSupertypes(NodeTypeTemplateInterface $nodeType) + { + $this->expectToken(Token::TK_SYMBOL, '>'); + + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '?')) { + $nodeType->setDeclaredSuperTypeNames(array('?')); + } else { + $nodeType->setDeclaredSuperTypeNames($this->parseCndStringList()); + } + } + + /** + * The node type attributes are indicated by the presence or absence of keywords. + * + * If 'orderable' is present without a '?' then orderable child nodes is supported. + * If 'orderable' is present with a '?' then orderable child nodes is a variant. + * If 'orderable' is absent then orderable child nodes * is not supported. + * + * If 'mixin' is present without a '?' then the node type is a mixin. + * If 'mixin' is present with a '?' then the mixin status is a variant. + * If 'mixin' is absent then the node type is primary. + * + * If 'abstract' is present without a '?' then the node type is abstract. + * If 'abstract' is present with a '?' then the abstract status is a variant. + * If 'abstract' is absent then the node type is concrete. + * + * If 'query' is present then the node type is queryable. + * If 'noquery' is present then the node type is not queryable. + * If neither query nor noquery are present then the queryable setting of the + * node type is a variant. + * + * If 'primaryitem' is present without a '?' then the string following it is + * the name of the primary item of the node type. + * If 'primaryitem' is present with a '?' then the primary item is a variant. + * If 'primaryitem' is absent then the node type has no primary item. + * + * NodeTypeAttribute ::= Orderable | Mixin | Abstract | Query | PrimaryItem + * Orderable ::= ('orderable' | 'ord' | 'o') ['?'] + * Mixin ::= ('mixin' | 'mix' | 'm') ['?'] + * Abstract ::= ('abstract' | 'abs' | 'a') ['?'] + * Query ::= ('noquery' | 'nq') | ('query' | 'q' ) + * PrimaryItem ::= ('primaryitem'| '!')(String | '?') + */ + protected function parseNodeTypeAttributes(NodeTypeTemplateInterface $nodeType) + { + while (true) { + if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->ORDERABLE)) { + $nodeType->setOrderableChildNodes(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->MIXIN)) { + $nodeType->setMixin(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->ABSTRACT)) { + $nodeType->setAbstract(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->NOQUERY)) { + $nodeType->setQueryable(false); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->QUERY)) { + $nodeType->setQueryable(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->PRIMARYITEM)) { + /* + * If 'primaryitem' is present without a '?' then the string following it is + * the name of the primary item of the node type. + * If 'primaryitem' is present with a '?' then the primary item is a variant. + * If 'primaryitem' is absent then the node type has no primary item. + * + * PrimaryItem ::= ('primaryitem'| '!')(String | '?') + */ + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '?')) { + $nodeType->setPrimaryItemName('?'); + } else { + $this->tokenQueue->next(); + $nodeType->setPrimaryItemName($this->parseCndString()); + continue; + } + } else { + return; + } + $this->tokenQueue->next(); + } + } + + /** + * Parse both the children propery and nodes definitions + * + * {PropertyDef | ChildNodeDef} + */ + protected function parseChildrenAndAttributes(NodeTypeTemplateInterface $nodeType) + { + while (true) { + if ($this->checkToken(Token::TK_SYMBOL, '-')) { + $this->parsePropDef($nodeType); + } elseif ($this->checkToken(Token::TK_SYMBOL, '+')) { + $this->parseChildNodeDef($nodeType); + } else { + return; + } + } + } + + /** + * A property definition consists of a property name element followed by + * optional property type, default values, property attributes and value + * constraints elements. + * + * The property name, or '*' to indicate a residual property definition, + * is prefixed with a '-'. + * + * PropertyDef ::= PropertyName [PropertyType] [DefaultValues] + * [PropertyAttribute {PropertyAttribute}] + * [ValueConstraints] + * PropertyName ::= '-' String + */ + protected function parsePropDef(NodeTypeTemplateInterface $nodeType) + { + $this->expectToken(Token::TK_SYMBOL, '-'); + + $property = $this->ntm->createPropertyDefinitionTemplate(); + $property->setAutoCreated(false); + $property->setMandatory(false); + $property->setMultiple(false); + $property->setOnParentVersion(OnParentVersionAction::COPY); + $property->setProtected(false); + $property->setRequiredType(PropertyType::STRING); + $property->setFullTextSearchable(true); + $property->setQueryOrderable(true); + $nodeType->getPropertyDefinitionTemplates()->append($property); + + // Parse the property name + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '*')) { + $property->setName('*'); + } else { + $property->setName($this->parseCndString()); + } + + // Parse the property type + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '(')) { + $this->parsePropertyType($property); + } + + // Parse default value + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '=')) { + $this->parseDefaultValue($property); + } + + $this->parsePropertyAttributes($property); + + // Check if there is a constraint (and not another namespace def) + // Next token is '<' and two token later it's not '=', i.e. not 'tokenQueue->peek(); + $next2 = $this->tokenQueue->peek(2); + if ($next1 && $next1->getData() === '<' && (!$next2 || $next2->getData() !== '=')) { + $this->parseValueConstraints($property); + } + } + + /** + * The property type is delimited by parentheses ('*' is a synonym for UNDEFINED). + * If this element is absent, STRING is assumed. A '?' indicates that this + * attribute is a variant. + * + * PropertyType ::= '(' ('STRING' | 'BINARY' | 'LONG' | 'DOUBLE' | + * 'BOOLEAN' | 'DATE' | 'NAME' | 'PATH' | + * 'REFERENCE' | 'WEAKREFERENCE' | + * 'DECIMAL' | 'URI' | 'UNDEFINED' | '*' | + * '?') ')' + */ + protected function parsePropertyType(PropertyDefinitionTemplateInterface $property) + { + $types = array("STRING", "BINARY", "LONG", "DOUBLE", "BOOLEAN", "DATE", "NAME", "PATH", + "REFERENCE", "WEAKREFERENCE", "DECIMAL", "URI", "UNDEFINED", "*", "?"); + + if (! $this->checkTokenIn(Token::TK_IDENTIFIER, $types)) { + throw new ParserException($this->tokenQueue, sprintf("Invalid property type: %s", $this->tokenQueue->get()->getData())); + } + + $data = $this->tokenQueue->get()->getData(); + + $this->expectToken(Token::TK_SYMBOL, ')'); + + $property->setRequiredType(PropertyType::valueFromName($data)); + } + + /** + * The default values, if any, are listed after a '='. The attribute is a + * list in order to accommodate multi-value properties. The absence of this + * element indicates that there is no static default value reportable. A '?' + * indicates that this attribute is a variant + * + * DefaultValues ::= '=' (StringList | '?') + */ + protected function parseDefaultValue(PropertyDefinitionTemplateInterface $property) + { + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '?')) { + $list = array('?'); + } else { + $list = $this->parseCndStringList(); + } + + $property->setDefaultValues($list); + } + + /** + * The value constraints, if any, are listed after a '<'. The absence of + * this element indicates that no value constraints reportable within the + * value constraint syntax. A '?' indicates that this attribute is a variant + * + * ValueConstraints ::= '<' (StringList | '?') + */ + protected function parseValueConstraints(PropertyDefinitionTemplateInterface $property) + { + $this->expectToken(Token::TK_SYMBOL, '<'); + + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '?')) { + $list = array('?'); + } else { + $list = $this->parseCndStringList(); + } + + $property->setValueConstraints($list); + } + + /** + * The property attributes are indicated by the presence or absence of keywords. + * + * If 'autocreated' is present without a '?' then the item is autocreated. + * If 'autocreated' is present with a '?' then the autocreated status is a variant. + * If 'autocreated' is absent then the item is not autocreated. + * + * If 'mandatory' is present without a '?' then the item is mandatory. + * If 'mandatory' is present with a '?' then the mandatory status is a variant. + * If 'mandatory' is absent then the item is not mandatory. + * + * If 'protected' is present without a '?' then the item is protected. + * If 'protected' is present with a '?' then the protected status is a variant. + * If 'protected' is absent then the item is not protected. + * + * The OPV status of an item is indicated by the presence of that corresponding + * keyword. + * If no OPV keyword is present then an OPV status of COPY is assumed. + * If the keyword 'OPV' followed by a '?' is present then the OPV status of the + * item is a variant. + * + * If 'multiple' is present without a '?' then the property is multi-valued. + * If 'multiple' is present with a '?' then the multi-value status is a variant. + * If 'multiple' is absent then the property is single-valued. + * + * The available query comparison operators are listed after the keyword 'queryops'. + * If 'queryops' is followed by a '?' then this attribute is a variant. + * If this element is absent then the full set of operators is available. + * + * If 'nofulltext' is present without a '?' then the property does not support full + * text search. + * If 'nofulltext' is present with a '?' then this attribute is a variant. + * If 'nofulltext' is absent then the property does support full text search. + * + * If 'noqueryorder' is present without a '?' then query results cannot be ordered + * by this property. + * If 'noqueryorder' is present with a '?' then this attribute is a variant. + * If 'noqueryorder' is absent then query results can be ordered by this property. + * + * PropertyAttribute ::= Autocreated | Mandatory | Protected | + * Opv | Multiple | QueryOps | NoFullText | + * NoQueryOrder + * Autocreated ::= ('autocreated' | 'aut' | 'a' )['?'] + * Mandatory ::= ('mandatory' | 'man' | 'm') ['?'] + * Protected ::= ('protected' | 'pro' | 'p') ['?'] + * Opv ::= 'COPY' | 'VERSION' | 'INITIALIZE' | 'COMPUTE' | + * 'IGNORE' | 'ABORT' | ('OPV' '?') + * Multiple ::= ('multiple' | 'mul' | '*') ['?'] + * QueryOps ::= ('queryops' | 'qop') + * (('''Operator {','Operator}''') | '?') + * Operator ::= '=' | '<>' | '<' | '<=' | '>' | '>=' | 'LIKE' + * NoFullText ::= ('nofulltext' | 'nof') ['?'] + * NoQueryOrder ::= ('noqueryorder' | 'nqord') ['?'] + */ + protected function parsePropertyAttributes(PropertyDefinitionTemplateInterface $property) + { + $opvSeen = false; + while (true) { + if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->AUTOCREATED)) { + $property->setAutoCreated(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->MANDATORY)) { + $property->setMandatory(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->PROTECTED)) { + $property->setProtected(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->MULTIPLE)) { + $property->setMultiple(true); + } else if ($this->checkTokenIn(Token::TK_SYMBOL, $this->MULTIPLE)) { + $property->setMultiple(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->QUERYOPS)) { + $property->setAvailableQueryOperators($this->parseQueryOpsAttribute()); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->NOFULLTEXT)) { + $property->setFullTextSearchable(false); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->NOQUERYORDER)) { + $property->setQueryOrderable(false); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->OPV)) { + if ($opvSeen) { + throw new ParserException($this->tokenQueue, 'More than one on parent version action specified on property ' . $property->getName()); + } + $token = $this->tokenQueue->get(); + $property->setOnParentVersion(OnParentVersionAction::valueFromName($token->getData())); + $opvSeen = true; + continue; + } else { + return; + } + $this->tokenQueue->next(); + } + } + + /** + * A child node definition consists of a node name element followed by optional + * required node types, default node types and node attributes elements. + * + * The node name, or '*' to indicate a residual property definition, is prefixed + * with a '+'. + * + * The required primary node type list is delimited by parentheses. If this + * element is missing then a required primary node type of nt:base is assumed. + * A '?' indicates that the this attribute is a variant. + * + * ChildNodeDef ::= NodeName [RequiredTypes] [DefaultType] + * [NodeAttribute {NodeAttribute}] + * NodeName ::= '+' String + * RequiredTypes ::= '(' (StringList | '?') ')' + * DefaultType ::= '=' (String | '?') + */ + protected function parseChildNodeDef(NodeTypeTemplateInterface $nodeType) + { + $this->expectToken(Token::TK_SYMBOL, '+'); + $childType = $this->ntm->createNodeDefinitionTemplate(); + $nodeType->getNodeDefinitionTemplates()->append($childType); + + // Parse the property name + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '*')) { + $childType->setName('*'); + } else { + $childType->setName($this->parseCndString()); + } + + // Parse the required types + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '(')) { + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '?')) { + $list = '?'; + } else { + $list = $this->parseCndStringList(); + } + $this->expectToken(Token::TK_SYMBOL, ')'); + $childType->setRequiredPrimaryTypeNames($list); + } + + // Parse the default type + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '=')) { + $childType->setDefaultPrimaryTypeName($this->parseCndString()); + } + + $this->parseChildNodeAttributes($nodeType, $childType); + } + + /** + * The node attributes are indicated by the presence or absence of keywords. + * + * If 'autocreated' is present without a '?' then the item is autocreated. + * If 'autocreated' is present with a '?' then the autocreated status is a variant. + * If 'autocreated' is absent then the item is not autocreated. + * + * If 'mandatory' is present without a '?' then the item is mandatory. + * If 'mandatory' is present with a '?' then the mandatory status is a variant. + * If 'mandatory' is absent then the item is not mandatory. + * + * If 'protected' is present without a '?' then the item is protected. + * If 'protected' is present with a '?' then the protected status is a variant. + * If 'protected' is absent then the item is not protected. + * + * The OPV status of an item is indicated by the presence of that corresponding + * keyword. + * If no OPV keyword is present then an OPV status of COPY is assumed. + * If the keyword 'OPV' followed by a '?' is present then the OPV status of the + * item is a variant. + * + * If 'sns' is present without a '?' then the child node supports same-name siblings. + * If 'sns' is present with a '?' then this attribute is a variant. + * If 'sns' is absent then the child node does support same-name siblings. + * + * NodeAttribute ::= Autocreated | Mandatory | Protected | + * Opv | Sns + * Autocreated ::= ('autocreated' | 'aut' | 'a' )['?'] + * Mandatory ::= ('mandatory' | 'man' | 'm') ['?'] + * Protected ::= ('protected' | 'pro' | 'p') ['?'] + * Opv ::= 'COPY' | 'VERSION' | 'INITIALIZE' | 'COMPUTE' | + * 'IGNORE' | 'ABORT' | ('OPV' '?') + * Sns ::= ('sns' | '*') ['?'] + */ + protected function parseChildNodeAttributes( + NodeTypeTemplateInterface $parentType, + NodeDefinitionTemplateInterface $childType + ) { + while(true) { + if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->PRIMARYITEM)) { + $parentType->setPrimaryItemName($childType->getName()); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->AUTOCREATED)) { + $childType->setAutoCreated(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->MANDATORY)) { + $childType->setMandatory(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->PROTECTED)) { + $childType->setProtected(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->SNS)) { + $childType->setSameNameSiblings(true); + } else if ($this->checkTokenIn(Token::TK_IDENTIFIER, $this->OPV)) { + $token = $this->tokenQueue->get(); + $childType->setOnParentVersion(OnParentVersionAction::valueFromName($token->getData())); + continue; + } else { + return; + } + + $this->tokenQueue->next(); + } + } + + /** + * Parse a string list + * + * StringList ::= String {',' String} + * + * @return array + */ + protected function parseCndStringList() + { + $strings = array(); + + $strings[] = $this->parseCndString(); + while ($this->checkAndExpectToken(Token::TK_SYMBOL, ',')) { + $strings[] = $this->parseCndString(); + } + + return $strings; + } + + /** + * Parse a string + * + * String ::= QuotedString | UnquotedString + * QuotedString ::= SingleQuotedString | DoubleQuotedString + * SingleQuotedString ::= ''' UnquotedString ''' + * DoubleQuotedString ::= '"' UnquotedString '"' + * UnquotedString ::= LocalName + * LocalName ::= ValidString – SelfOrParent + * SelfOrParent ::= '.' | '..' + * ValidString ::= ValidChar {ValidChar} + * ValidChar ::= XmlChar – InvalidChar + * InvalidChar ::= '/' | ':' | '[' | ']' | '|' | '*' + * XmlChar ::= Any character that matches the Char production + * at http://www.w3.org/TR/xml/#NT-Char + * Char ::= "\t" | "\r" | "\n" | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + * + * TODO: check \n, \r, \t are valid in CND strings! + * + * @return string + */ + protected function parseCndString() + { + $string = ''; + $lastType = null; + + while (true) { + $token = $this->tokenQueue->peek(); + $type = $token->getType(); + $data = $token->getData(); + + if ($type === Token::TK_STRING) { + $string = substr($data, 1, -1); + $this->tokenQueue->next(); + return $string; + } + + // If it's not an identifier or a symbol allowed in a string, break + if ($type !== Token::TK_IDENTIFIER && $type !== Token::TK_SYMBOL + || ($type === Token::TK_SYMBOL && $data !== '_' && $data !== ':')) { + break; + } + + // Detect spaces (an identifier cannot be followed by an identifier as it would have been read as a single token) + if ($type === Token::TK_IDENTIFIER && $lastType === Token::TK_IDENTIFIER) { + break; + } + + $string .= $token->getData(); + + $this->tokenQueue->next(); + $lastType = $type; + } + + if ($string === '') { + throw new ParserException($this->tokenQueue, sprintf("Expected CND string, found '%s': ", $this->tokenQueue->peek()->getData())); + } + + return $string; + } + + /** + * The available query comparison operators are listed after the keyword 'queryops'. + * If 'queryops' is followed by a '?' then this attribute is a variant. + * If this element is absent then the full set of operators is available. + * + * QueryOps ::= ('queryops' | 'qop') + * (('''Operator {','Operator}''') | '?') + * Operator ::= '=' | '<>' | '<' | '<=' | '>' | '>=' | 'LIKE' + * + * @return array + */ + protected function parseQueryOpsAttribute() + { + if ($this->checkAndExpectToken(Token::TK_SYMBOL, '?')) { + // this denotes a variant, whatever that is + throw new ParserException($this->tokenQueue, 'TODO: understand what "variant" means'); + } + + $ops = array(); + do { + + $op = $this->parseQueryOperator(); + $ops[] = $op; + + } while ($op && $this->checkAndExpectToken(Token::TK_SYMBOL, ',')); + + if (empty($ops)) { + // There must be at least an operator if this attribute is not variant + throw new ParserException($this->tokenQueue, 'Operator expected'); + } + + return $ops; + } + + /** + * Parse a query operator. + * + * @return bool|string + */ + protected function parseQueryOperator() + { + $token = $this->tokenQueue->peek(); + $data = $token->getData(); + + $nextToken = $this->tokenQueue->peek(1); + $nextData = $nextToken->getData(); + $op = false; + + switch ($data) { + case '<': + $op = ($nextData === '>' ? '>=' : ($nextData === '=' ? '<=' : '<')); + break; + case '>': + $op = ($nextData === '=' ? '>=' : '>'); + break; + case '=': + $op = '='; + break; + case 'LIKE': + $op = 'LIKE'; + break; + } + + // Consume the correct number of tokens + if ($op === 'LIKE' || strlen($op) === 1) { + $this->tokenQueue->next(); + } elseif (strlen($op) === 2) { + $this->tokenQueue->next(); + $this->tokenQueue->next(); + } + + return $op; + } +} diff --git a/src/PHPCR/Util/CND/Reader/BufferReader.php b/src/PHPCR/Util/CND/Reader/BufferReader.php new file mode 100644 index 00000000..05931d68 --- /dev/null +++ b/src/PHPCR/Util/CND/Reader/BufferReader.php @@ -0,0 +1,132 @@ + + */ +class BufferReader implements ReaderInterface +{ + protected $eofMarker; + + protected $buffer; + + protected $startPos; + + protected $forwardPos; + + protected $curLine; + + protected $curCol; + + protected $nextCurLine; + + protected $nextCurCol; + + public function __construct($buffer) + { + $this->eofMarker = chr(1); + $this->buffer = $buffer . $this->eofMarker; + + $this->reset(); + } + + public function reset() + { + $this->startPos = 0; + + $this->forwardPos = 0; + $this->curLine = $this->curCol = 1; + $this->nextCurLine = $this->nextCurCol = 1; + } + + public function getEofMarker() + { + return $this->eofMarker; + } + + /** + * @return int + */ + function getCurrentLine() + { + return $this->curLine; + } + + /** + * @return int + */ + function getCurrentColumn() + { + return $this->curCol; + } + + /** + * Return the literal delimited by start and end position + * @return string + */ + public function current() + { + return substr($this->buffer, $this->startPos, $this->forwardPos - $this->startPos); + } + + public function currentChar() + { + return substr($this->buffer, $this->forwardPos, 1); + } + + public function isEof() + { + return $this->currentChar() === $this->getEofMarker() + || $this->currentChar() === false + || $this->startPos > strlen($this->buffer) + || $this->forwardPos > strlen($this->buffer); + } + + /** + * Advance the forward position and return the literal delimited by start and end position + * @return string + */ + public function forward() + { + if ($this->forwardPos < strlen($this->buffer)) { + $this->forwardPos++; + $this->nextCurCol++; + } + + if ($this->current() === PHP_EOL) { + $this->nextCurLine++; + $this->nextCurCol = 1; + } + + return $this->current(); + } + + public function forwardChar() + { + $this->forward(); + return $this->currentChar(); + } + + public function rewind() + { + $this->forwardPos = $this->startPos; + $this->nextCurLine = $this->curLine; + $this->nextCurCol = $this->curCol; + } + + public function consume() + { + $current = $this->current(); + + if ($current !== $this->getEofMarker()) { + $this->startPos = $this->forwardPos; + } + + $this->curLine = $this->nextCurLine; + $this->curCol = $this->nextCurCol; + + return $current; + } + +} diff --git a/src/PHPCR/Util/CND/Reader/FileReader.php b/src/PHPCR/Util/CND/Reader/FileReader.php new file mode 100644 index 00000000..0926b55f --- /dev/null +++ b/src/PHPCR/Util/CND/Reader/FileReader.php @@ -0,0 +1,27 @@ + + */ +class FileReader extends BufferReader +{ + protected $fileName; + + public function __construct($fileName) + { + if (!file_exists($fileName)) { + throw new \InvalidArgumentException(sprintf("Invalid file '%s'", $fileName)); + } + + $this->fileName = $fileName; + + parent::__construct(file_get_contents($fileName)); + } + + public function getFileName() + { + return $this->fileName; + } +} diff --git a/src/PHPCR/Util/CND/Reader/ReaderInterface.php b/src/PHPCR/Util/CND/Reader/ReaderInterface.php new file mode 100644 index 00000000..79784ea8 --- /dev/null +++ b/src/PHPCR/Util/CND/Reader/ReaderInterface.php @@ -0,0 +1,59 @@ + + */ +interface ReaderInterface +{ + /** + * @return bool + */ + public function getEofMarker(); + + /** + * @return string with just one character + */ + public function currentChar(); + + public function isEof(); + + /** + * @return int + */ + function getCurrentLine(); + + /** + * @return int + */ + function getCurrentColumn(); + + /** + * Return the literal delimited by start and end position + * @return string + */ + public function current(); + + /** + * Advance the forward position and return the literal delimited by start and end position + * @return string + */ + public function forward(); + + public function forwardChar(); + + /** + * Rewind the forward position to the start position + * @return void + */ + public function rewind(); + + /** + * Return the literal delimited by start and end position, then set the + * start position to the end position + * + * @return string + */ + public function consume(); +} diff --git a/src/PHPCR/Util/CND/Scanner/AbstractScanner.php b/src/PHPCR/Util/CND/Scanner/AbstractScanner.php new file mode 100644 index 00000000..ba67fc42 --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/AbstractScanner.php @@ -0,0 +1,62 @@ + + */ +abstract class AbstractScanner +{ + /** + * @var TokenQueue + */ + private $queue; + + protected $context; + + public function __construct(Context\ScannerContext $context) + { + $this->resetQueue(); + $this->context = $context; + } + + public function resetQueue() + { + $this->queue = new TokenQueue(); + } + + /** + * @param Token $token + * @return Token | void + */ + public function applyFilters(Token $token) + { + foreach ($this->context->getTokenFilters() as $filter) { + + $token = $filter->filter($token); + + if (is_null($token)) { + break; + } + } + + return $token; + } + + protected function getQueue() + { + return $this->queue; + } + + protected function addToken(ReaderInterface $reader, Token $token) + { + $token->setLine($reader->getCurrentLine()); + $token->setRow($reader->getCurrentColumn()); + + if ($token = $this->applyFilters($token)) { + $this->queue->add($token); + } + } +} diff --git a/src/PHPCR/Util/CND/Scanner/Context/DefaultScannerContext.php b/src/PHPCR/Util/CND/Scanner/Context/DefaultScannerContext.php new file mode 100644 index 00000000..1fb0d8a2 --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/Context/DefaultScannerContext.php @@ -0,0 +1,32 @@ + + */ +class DefaultScannerContext extends ScannerContext +{ + public function __construct() + { + $this->addWhitespace(" "); + $this->addWhitespace("\t"); + + $this->addStringDelimiter('\''); + $this->addStringDelimiter('"'); + + $this->addLineCommentDelimiter('//'); + + $this->addBlockCommentDelimiter('/*', '*/'); + + $symbols = array( + '<', '>', '+', '*', '%', '&', '/', '(', ')', '=', '?', '#', '|', '!', '~', + '[', ']', '{', '}', '$', ',', ';', ':', '.', '-', '_', '\\', + ); + foreach($symbols as $symbol) { + $this->addSymbol($symbol); + } + } +} diff --git a/src/PHPCR/Util/CND/Scanner/Context/DefaultScannerContextWithoutSpacesAndComments.php b/src/PHPCR/Util/CND/Scanner/Context/DefaultScannerContextWithoutSpacesAndComments.php new file mode 100644 index 00000000..606b2448 --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/Context/DefaultScannerContextWithoutSpacesAndComments.php @@ -0,0 +1,21 @@ + + */ +class DefaultScannerContextWithoutSpacesAndComments extends DefaultScannerContext +{ + public function __construct() + { + parent::__construct(); + + $this->addTokenFilter(new TokenFilter\NoNewlinesFilter()); + $this->addTokenFilter(new TokenFilter\NoWhitespacesFilter()); + $this->addTokenFilter(new TokenFilter\NoCommentsFilter()); + } + +} diff --git a/src/PHPCR/Util/CND/Scanner/Context/ScannerContext.php b/src/PHPCR/Util/CND/Scanner/Context/ScannerContext.php new file mode 100644 index 00000000..75ea053a --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/Context/ScannerContext.php @@ -0,0 +1,159 @@ + + */ +class ScannerContext +{ + /** + * Characters to be considered as white spaces + * @var array + */ + protected $whitespaces = array(); + + /** + * Characters to be considered as paired string delimiters. + * + * These characters will not be used as symbols, thus if you remove any from this list, + * you must add it to the $symbols array to be taken in account as a symbol. + * + * @var array + */ + protected $stringDelimiters = array(); + + /** + * Line comments start + * + * @var array + */ + protected $lineCommentDelimiters = array(); + + /** + * Block comments delimiters + * + * @var array + */ + protected $blockCommentDelimiters = array(); + + /** + * Characters to be considered as symbols. + * + * String delimiters must not appear in this array. + * + * @var array + */ + protected $symbols = array(); + + /** + * @var TokenFilterInterface[] + */ + protected $tokenFilters = array(); + + + /** + * @param string $startDelim + * @param string $endDelim + */ + public function addBlockCommentDelimiter($startDelim, $endDelim) + { + $this->blockCommentDelimiters[$startDelim] = $endDelim; + } + + /** + * @return array + */ + public function getBlockCommentDelimiters() + { + return $this->blockCommentDelimiters; + } + + /** + * @param string $delim + */ + public function addLineCommentDelimiter($delim) + { + $this->lineCommentDelimiters[] = $delim; + } + + /** + * @return array + */ + public function getLineCommentDelimiters() + { + return $this->lineCommentDelimiters; + } + + /** + * @param string $delim + */ + public function addStringDelimiter($delim) + { + if (!in_array($delim, $this->stringDelimiters)) { + $this->stringDelimiters[] = $delim; + } + } + + /** + * @return array + */ + public function getStringDelimiters() + { + return $this->stringDelimiters; + } + + /** + * @param string $symbol + */ + public function addSymbol($symbol) + { + if (!in_array($symbol, $this->symbols)) { + $this->symbols[] = $symbol; + } + } + + /** + * @return array + */ + public function getSymbols() + { + return $this->symbols; + } + + /** + * @param array $whitespace + */ + public function addWhitespace($whitespace) + { + if (!in_array($whitespace, $this->whitespaces)) { + $this->whitespaces[] = $whitespace; + } + } + + /** + * @return array + */ + public function getWhitespaces() + { + return $this->whitespaces; + } + + /** + * @param TokenFilterInterface $filter + */ + public function addTokenFilter(TokenFilterInterface $filter) + { + $this->tokenFilters[] = $filter; + } + + /** + * @return TokenFilterInterface[] + */ + public function getTokenFilters() + { + return $this->tokenFilters; + } +} diff --git a/src/PHPCR/Util/CND/Scanner/GenericScanner.php b/src/PHPCR/Util/CND/Scanner/GenericScanner.php new file mode 100644 index 00000000..7497590b --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/GenericScanner.php @@ -0,0 +1,298 @@ + + */ +class GenericScanner extends AbstractScanner +{ + /** + * Scan the given reader and construct a TokenQueue composed of GenericToken. + * + * @param ReaderInterface $reader + * @return TokenQueue + */ + public function scan(ReaderInterface $reader) + { + $this->resetQueue(); + + while (!$reader->isEof()) { + $tokenFound = false; + $tokenFound = $tokenFound || $this->consumeComments($reader); + $tokenFound = $tokenFound || $this->consumeNewLine($reader); + $tokenFound = $tokenFound || $this->consumeSpaces($reader); + $tokenFound = $tokenFound || $this->consumeString($reader); + $tokenFound = $tokenFound || $this->consumeIdentifiers($reader); + $tokenFound = $tokenFound || $this->consumeSymbols($reader); + + if (!$tokenFound) { + $char = $reader->forwardChar(); + $reader->consume(); + + if ($char !== $reader->getEofMarker()) { + $token = new GenericToken(GenericToken::TK_UNKNOWN, $char); + $this->addToken($reader, $token); + } + } + + } + + return $this->getQueue(); + } + + /** + * Detect and consume whitespaces + * + * @param ReaderInterface $reader + * @return bool + */ + protected function consumeSpaces(ReaderInterface $reader) + { + if (in_array($reader->currentChar(), $this->context->getWhitespaces())) { + + $char = $reader->forwardChar(); + while (in_array($char, $this->context->getWhitespaces())) { + $char = $reader->forwardChar(); + } + + $buffer = $reader->consume(); + + $token = new GenericToken(GenericToken::TK_WHITESPACE, $buffer); + $this->addToken($reader, $token); + + return true; + } + + return false; + } + + /** + * Detect and consume newlines + * + * @param ReaderInterface $reader + * @return bool + */ + protected function consumeNewLine(ReaderInterface $reader) + { + if ($reader->currentChar() === PHP_EOL) { + + $token = new GenericToken(GenericToken::TK_NEWLINE, PHP_EOL); + $this->addToken($reader, $token); + + + while ($reader->forward() === PHP_EOL) { + $reader->consume(); + $reader->forward(); + } + $reader->rewind(); + + return true; + } + + return false; + } + + /** + * Detect and consume strings + * + * @throws ScannerException + * @param ReaderInterface $reader + * @return bool + */ + protected function consumeString(ReaderInterface $reader) + { + $curDelimiter = $reader->currentChar(); + if (in_array($curDelimiter, $this->context->getStringDelimiters())) { + + $char = $reader->forwardChar(); + while ($char !== $curDelimiter) { + + if ($char === PHP_EOL) { + throw new ScannerException($reader, "Newline detected in string"); + } + + $char = $reader->forwardChar(); + } + $reader->forward(); + + $token = new GenericToken(GenericToken::TK_STRING, $reader->consume()); + $this->addToken($reader, $token); + return true; + } + + return false; + } + + /** + * Detect and consume comments + * + * @param ReaderInterface $reader + * @return bool + */ + protected function consumeComments(ReaderInterface $reader) + { + if ($this->consumeBlockComments($reader)) { + return true; + } + + return $this->consumeLineComments($reader); + } + + /** + * Detect and consume block comments + * + * @throws ScannerException + * @param ReaderInterface $reader + * @return bool + */ + protected function consumeBlockComments(ReaderInterface $reader) + { + $nextChar = $reader->currentChar(); + foreach($this->context->getBlockCommentDelimiters() as $beginDelim => $endDelim) { + + if ($nextChar === $beginDelim[0]) { + + // Lookup the start delimiter + for ($i = 1; $i <= strlen($beginDelim); $i++) { + $reader->forward(); + } + if ($reader->current() === $beginDelim) { + + // Start delimiter found, let's try to find the end delimiter + $nextChar = $reader->forwardChar(); + + while (! $reader->isEof()) { + + if ($nextChar === $endDelim[0]) { + + for ($i = 1; $i <= strlen($endDelim); $i++) { + $reader->forward(); + } + + if (substr($reader->current(), -2) === $endDelim) { + $token = new GenericToken(GenericToken::TK_COMMENT, $reader->consume()); + $this->addToken($reader, $token); + + return true; + } + } + + $nextChar = $reader->forwardChar(); + } + + // End of file reached and no end delimiter found, error + throw new ScannerException($reader, "Unterminated block comment"); + + } else { + + // Start delimiter not found, rewind the looked up characters + $reader->rewind(); + return false; + } + + } + + } + + return false; + + } + + /** + * Detect and consume line comments + * + * @param ReaderInterface $reader + * @return bool + */ + protected function consumeLineComments(ReaderInterface $reader) + { + $nextChar = $reader->currentChar(); + foreach($this->context->getLineCommentDelimiters() as $delimiter) { + + if ($delimiter && $nextChar === $delimiter[0]) { + + for ($i = 1; $i <= strlen($delimiter); $i++) { + $reader->forward(); + } + + if ($reader->current() === $delimiter) { + + // consume to end of line + $char = $reader->currentChar(); + while (!$reader->isEof() && $char !== PHP_EOL) { + $char = $reader->forwardChar(); + } + $token = new GenericToken(GenericToken::TK_COMMENT, $reader->consume()); + $this->addToken($reader, $token); + + return true; + + } else { + + // Rewind the looked up characters + $reader->rewind(); + return false; + } + + } + } + + return false; + } + + /** + * Detect and consume identifiers + * + * @param ReaderInterface $reader + * @return bool + */ + protected function consumeIdentifiers(ReaderInterface $reader) + { + $nextChar = $reader->currentChar(); + + if (preg_match('/[a-zA-Z]/', $nextChar)) { + $nextChar = $reader->forwardChar(); + while (preg_match('/[a-zA-Z0-9_]/', $nextChar)) { + $nextChar = $reader->forwardChar(); + } + $token = new GenericToken(GenericToken::TK_IDENTIFIER, $reader->consume()); + $this->addToken($reader, $token); + return true; + } + + return false; + } + + /** + * Detect and consume symbols + * + * @param ReaderInterface $reader + * @return bool + */ + protected function consumeSymbols(ReaderInterface $reader) + { + $found = false; + $nextChar = $reader->currentChar(); + while (in_array($nextChar, $this->context->getSymbols())) { + $found = true; + $token = new GenericToken(GenericToken::TK_SYMBOL, $nextChar); + $this->addToken($reader, $token); + + $reader->consume(); + $nextChar = $reader->forwardChar(); + } + + $reader->consume(); + + return $found; + } +} diff --git a/src/PHPCR/Util/CND/Scanner/GenericToken.php b/src/PHPCR/Util/CND/Scanner/GenericToken.php new file mode 100644 index 00000000..f620e5ba --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/GenericToken.php @@ -0,0 +1,40 @@ + + */ +class GenericToken extends Token +{ + const TK_WHITESPACE = 0; + const TK_NEWLINE = 1; + const TK_STRING = 2; + const TK_COMMENT = 3; + const TK_IDENTIFIER = 4; + const TK_KEYWORD = 5; + const TK_SYMBOL = 6; + const TK_UNKNOWN = 99; + + public static function getTypeName($type) + { + switch ($type) { + case self::TK_WHITESPACE: return 'Whitespace'; + case self::TK_NEWLINE: return 'Newline'; + case self::TK_STRING: return 'String'; + case self::TK_COMMENT: return 'Comment'; + case self::TK_IDENTIFIER: return 'Identifier'; + case self::TK_KEYWORD: return 'Keyword'; + case self::TK_SYMBOL: return 'Symbol'; + } + + return 'Unknown'; + } + + public function __toString() + { + return sprintf("TOKEN(%s, '%s', %s, %s)", self::getTypeName($this->getType()), trim($this->data), $this->line, $this->row); + } + + +} diff --git a/src/PHPCR/Util/CND/Scanner/Token.php b/src/PHPCR/Util/CND/Scanner/Token.php new file mode 100644 index 00000000..b9609e02 --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/Token.php @@ -0,0 +1,112 @@ + + */ +class Token +{ + /** + * The type of token + * + * @var int + */ + public $type; + + /** + * The token raw data + * + * @var string + */ + public $data; + + /** + * The line where the token appears + * + * @var int + */ + protected $line; + + /** + * The column where the token appears + * + * @var int + */ + protected $row; + + + /** + * Constructor + * + * @param int $type + * @param string $data + * @param int $line + * @param int $row + */ + public function __construct($type = 0, $data = '', $line = 0, $row = 0) + { + $this->type = $type; + $this->data = $data; + $this->line = $line; + $this->row = $row; + } + + /** + * @return string + */ + public function getData() + { + return $this->data; + } + + /** + * @return int + */ + public function getType() + { + return $this->type; + } + + public function __toString() + { + return sprintf("TOKEN(%s, '%s', %s, %s)", $this->type, trim($this->data), $this->line, $this->row); + } + + /** + * @param int $line + */ + public function setLine($line) + { + $this->line = $line; + } + + /** + * @return int + */ + public function getLine() + { + return $this->line; + } + + /** + * @param int $row + */ + public function setRow($row) + { + $this->row = $row; + } + + /** + * @return int + */ + public function getRow() + { + return $this->row; + } + +} diff --git a/src/PHPCR/Util/CND/Scanner/TokenFilter/NoCommentsFilter.php b/src/PHPCR/Util/CND/Scanner/TokenFilter/NoCommentsFilter.php new file mode 100644 index 00000000..93e69eff --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/TokenFilter/NoCommentsFilter.php @@ -0,0 +1,16 @@ + + */ +class NoCommentsFilter extends TokenTypeFilter +{ + function __construct() + { + parent::__construct(GenericToken::TK_COMMENT); + } +} diff --git a/src/PHPCR/Util/CND/Scanner/TokenFilter/NoNewlinesFilter.php b/src/PHPCR/Util/CND/Scanner/TokenFilter/NoNewlinesFilter.php new file mode 100644 index 00000000..f4a203c2 --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/TokenFilter/NoNewlinesFilter.php @@ -0,0 +1,16 @@ + + */ +class NoNewlinesFilter extends TokenTypeFilter +{ + function __construct() + { + parent::__construct(GenericToken::TK_NEWLINE); + } +} diff --git a/src/PHPCR/Util/CND/Scanner/TokenFilter/NoWhitespacesFilter.php b/src/PHPCR/Util/CND/Scanner/TokenFilter/NoWhitespacesFilter.php new file mode 100644 index 00000000..90009032 --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/TokenFilter/NoWhitespacesFilter.php @@ -0,0 +1,16 @@ + + */ +class NoWhitespacesFilter extends TokenTypeFilter +{ + function __construct() + { + parent::__construct(GenericToken::TK_WHITESPACE); + } +} diff --git a/src/PHPCR/Util/CND/Scanner/TokenFilter/TokenFilterChain.php b/src/PHPCR/Util/CND/Scanner/TokenFilter/TokenFilterChain.php new file mode 100644 index 00000000..7b004dbf --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/TokenFilter/TokenFilterChain.php @@ -0,0 +1,39 @@ + + */ +class TokenFilterChain implements TokenFilterInterface +{ + /** + * @var TokenFilterInterface[] + */ + protected $filters; + + public function addFilter(TokenFilterInterface $filter) + { + $this->filters[] = $filter; + } + + /** + * @param Token $token + * @return Token | null + */ + function filter(Token $token) + { + foreach ($this->filters as $filter) { + + $token = $filter->filter($token); + + if (!$token) { + return null; + } + } + + return $token; + } +} diff --git a/src/PHPCR/Util/CND/Scanner/TokenFilter/TokenFilterInterface.php b/src/PHPCR/Util/CND/Scanner/TokenFilter/TokenFilterInterface.php new file mode 100644 index 00000000..865e2a43 --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/TokenFilter/TokenFilterInterface.php @@ -0,0 +1,18 @@ + + */ +interface TokenFilterInterface +{ + /** + * @abstract + * @param Token $token + * @return Token | null + */ + function filter(Token $token); +} diff --git a/src/PHPCR/Util/CND/Scanner/TokenFilter/TokenTypeFilter.php b/src/PHPCR/Util/CND/Scanner/TokenFilter/TokenTypeFilter.php new file mode 100644 index 00000000..96589223 --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/TokenFilter/TokenTypeFilter.php @@ -0,0 +1,35 @@ + + */ +class TokenTypeFilter implements TokenFilterInterface +{ + /** + * The filtered out token type + * @var int + */ + protected $type; + + public function __construct($tokenType) + { + $this->type = $tokenType; + } + + /** + * @param Token $token + * @return Token | null + */ + function filter(Token $token) + { + if ($token->getType() === $this->type) { + return null; + } + + return $token; + } +} diff --git a/src/PHPCR/Util/CND/Scanner/TokenQueue.php b/src/PHPCR/Util/CND/Scanner/TokenQueue.php new file mode 100644 index 00000000..0c4b3fb1 --- /dev/null +++ b/src/PHPCR/Util/CND/Scanner/TokenQueue.php @@ -0,0 +1,70 @@ + + */ +class TokenQueue implements \IteratorAggregate +{ + /** + * @var array + */ + protected $tokens; + + public function __construct($tokens = array()) + { + $this->tokens = $tokens; + } + + public function add(Token $token) + { + $this->tokens[] = $token; + } + + public function reset() + { + return reset($this->tokens); + } + + public function isEof() + { + return current($this->tokens) === false; + } + + public function peek($offset = 0) + { + if (!$offset) { + return current($this->tokens); + } + + $lookup = key($this->tokens) + $offset; + + if ($lookup >= count($this->tokens)) { + return false; + } + + return $this->tokens[key($this->tokens) + $offset]; + } + + public function get($count = 1) + { + $item = null; + for ($i = 1; $i <= $count; $i++) { + $item = $this->peek(); + $this->next(); + } + + return $item; + } + + public function next() + { + return next($this->tokens); + } + + public function getIterator() + { + return new \ArrayIterator($this->tokens); + } +} \ No newline at end of file diff --git a/tests/PHPCR/Tests/Util/CND/Fixtures/files/TestFile.php b/tests/PHPCR/Tests/Util/CND/Fixtures/files/TestFile.php new file mode 100644 index 00000000..d83cd493 --- /dev/null +++ b/tests/PHPCR/Tests/Util/CND/Fixtures/files/TestFile.php @@ -0,0 +1,18 @@ +assertInstanceOf('\PHPCR\Util\CND\Reader\BufferReader', $reader); + $this->assertAttributeEquals($buffer . $reader->getEofMarker(), 'buffer', $reader); + $this->assertAttributeEquals(0, 'startPos', $reader); + $this->assertAttributeEquals(0, 'forwardPos', $reader); + + $this->assertEquals(1, $reader->getCurrentLine()); + $this->assertEquals(1, $reader->getCurrentColumn()); + + $this->assertEquals('', $reader->current()); + $this->assertEquals('S', $reader->forward()); + $this->assertEquals('So', $reader->forward()); + + $reader->rewind(); + + $this->assertEquals(1, $reader->getCurrentLine()); + $this->assertEquals(1, $reader->getCurrentColumn()); + + $this->assertEquals('', $reader->current()); + $this->assertEquals('S', $reader->forward()); + $this->assertEquals('So', $reader->forward()); + $this->assertEquals('Som', $reader->forward()); + $this->assertEquals('Some', $reader->forward()); + $this->assertEquals('Some', $reader->consume()); + + $this->assertEquals(5, $reader->getCurrentColumn()); + + $this->assertEquals(' ', $reader->forward()); + $this->assertEquals(' r', $reader->forward()); + $reader->rewind(); + $this->assertEquals(' ', $reader->forward()); + $this->assertEquals(' ', $reader->consume()); + + $this->assertEquals(6, $reader->getCurrentColumn()); + + $this->assertEquals('r', $reader->forward()); + $this->assertEquals('ra', $reader->forward()); + $this->assertEquals('ran', $reader->forward()); + $this->assertEquals('rand', $reader->forward()); + $this->assertEquals('rando', $reader->forward()); + $this->assertEquals('random', $reader->forward()); + $this->assertEquals('random', $reader->consume()); + + $this->assertEquals(12, $reader->getCurrentColumn()); + + $this->assertEquals(PHP_EOL, $reader->forward()); + $this->assertEquals(PHP_EOL, $reader->consume()); + + $this->assertEquals(2, $reader->getCurrentLine()); + $this->assertEquals(1, $reader->getCurrentColumn()); + + $this->assertEquals('s', $reader->forward()); + $this->assertEquals('st', $reader->forward()); + $this->assertEquals('str', $reader->forward()); + $this->assertEquals('stri', $reader->forward()); + $this->assertEquals('strin', $reader->forward()); + $this->assertEquals('string', $reader->forward()); + $this->assertEquals('string', $reader->consume()); + + $this->assertEquals(2, $reader->getCurrentLine()); + $this->assertEquals(7, $reader->getCurrentColumn()); + + $this->assertEquals($reader->getEofMarker(), $reader->forward()); + $this->assertEquals($reader->getEofMarker(), $reader->consume()); + $this->assertEquals($reader->getEofMarker(), $reader->forward()); + } + + public function test__constructEmptyString() + { + $reader = new BufferReader(''); + + $this->assertInstanceOf('\PHPCR\Util\CND\Reader\BufferReader', $reader); + $this->assertAttributeEquals($reader->getEofMarker(), 'buffer', $reader); + $this->assertAttributeEquals(0, 'startPos', $reader); + $this->assertAttributeEquals(0, 'forwardPos', $reader); + + $this->assertEquals(1, $reader->getCurrentLine()); + $this->assertEquals(1, $reader->getCurrentColumn()); + + $this->assertEquals('', $reader->current()); + $this->assertEquals($reader->getEofMarker(), $reader->forward()); + $this->assertEquals($reader->getEofMarker(), $reader->forward()); + $reader->rewind(); + $this->assertEquals($reader->getEofMarker(), $reader->forward()); + $this->assertEquals($reader->getEofMarker(), $reader->consume()); + } + +} diff --git a/tests/PHPCR/Tests/Util/CND/Reader/FileReaderTest.php b/tests/PHPCR/Tests/Util/CND/Reader/FileReaderTest.php new file mode 100644 index 00000000..ad2d1dab --- /dev/null +++ b/tests/PHPCR/Tests/Util/CND/Reader/FileReaderTest.php @@ -0,0 +1,91 @@ +filename = __DIR__ . '/../Fixtures/files/TestFile.txt'; + $this->reader = new FileReader($this->filename); + + $this->lines = array( + 'This is a test file...', + '', + '...containing dummy content.', + '' + ); + + $this->content = file_get_contents($this->filename); + $this->chars = array_merge( + preg_split('//', $this->lines[0], -1, PREG_SPLIT_NO_EMPTY), + array("\n", "\n"), + preg_split('//', $this->lines[2], -1, PREG_SPLIT_NO_EMPTY), + array("\n", "\n") + ); + } + + /** + * @expectedException \InvalidArgumentException + */ + public function test__construct_fileNotFound() + { + $reader = new FileReader('unexisting_file'); + } + + public function testGetFileName() + { + $this->assertEquals($this->filename, $this->reader->getFileName()); + } + + public function testGetNextChar() + { + $curLine = 1; + $curCol = 1; + + for ($i = 0; $i < count($this->chars); $i++) { + + $peek = $this->reader->currentChar(); + + if ($peek === $this->reader->getEofMarker()) { + $this->assertEquals(count($this->chars) - 1, $i); + break; + } + + //var_dump('Expected:' . $this->chars[$i] . ', found: ' . $peek); + + $this->assertEquals($curLine, $this->reader->getCurrentLine()); + $this->assertEquals($curCol, $this->reader->getCurrentColumn()); + + // Assert isEof is false before the end of the file + $this->assertFalse($this->reader->isEof()); + + // Assert isEol is true at end of the lines + if ($peek === "\n") { + $curLine++; + $curCol = 1; + } else { + $curCol++; + } + + // Assert the next character is the expected one + $this->assertEquals($peek, $this->chars[$i]); + $this->assertEquals( + $this->chars[$i], + $peek, + sprintf("Character mismatch at position %s, expected '%s', found '%s'", $i, $this->chars[$i], $peek) + ); + + $this->reader->forward(); + $this->reader->consume(); + } + + // Check it's the end of the file + $this->assertEquals($this->reader->getEofMarker(), $this->reader->currentChar()); + $this->assertTrue($this->reader->isEof()); + $this->assertEquals(false, $this->reader->forwardChar()); + } + +} diff --git a/tests/PHPCR/Tests/Util/CND/Scanner/GenericScannerTest.php b/tests/PHPCR/Tests/Util/CND/Scanner/GenericScannerTest.php new file mode 100644 index 00000000..af73faab --- /dev/null +++ b/tests/PHPCR/Tests/Util/CND/Scanner/GenericScannerTest.php @@ -0,0 +1,172 @@ +expectedTokensNoEmptyToken = array(); + foreach($this->expectedTokens as $token) { + if ($token[0] !== Token::TK_NEWLINE && $token[0] !== Token::TK_WHITESPACE) { + $this->expectedTokensNoEmptyToken[] = $token; + } + } + } + + public function testScan() + { + $reader = new FileReader(__DIR__ . '/../Fixtures/files/TestFile.php'); + + // Test the raw file with newlines and whitespaces + $scanner = new GenericScanner(new DefaultScannerContext()); + $queue = $scanner->scan($reader); + $this->assertTokens($this->expectedTokens, $queue); + } + + public function testFilteredScan() + { + $reader = new FileReader(__DIR__ . '/../Fixtures/files/TestFile.php'); + + // Test the raw file with newlines and whitespaces + $context = new DefaultScannerContext(); + $context->addTokenFilter(new TokenFilter\NoNewlinesFilter()); + $context->addTokenFilter(new TokenFilter\NoWhitespacesFilter()); + $scanner = new GenericScanner($context); + + $queue = $scanner->scan($reader); + $this->assertTokens($this->expectedTokensNoEmptyToken, $queue); + } + + protected function assertTokens($tokens, TokenQueue $queue) + { + $queue->reset(); + + $it = new \ArrayIterator($tokens); + + $token = $queue->peek(); + + while ($it->valid()) { + + $expectedToken = $it->current(); + + $this->assertFalse($queue->isEof(), 'There is no more tokens, expected = ' . $expectedToken[1]); + + //var_dump("Expected: {$expectedToken[1]}, Found: {$token->getData()}"); + + $this->assertToken($expectedToken[0], $expectedToken[1], $token); + + $token = $queue->next(); + $it->next(); + } + + $this->assertTrue($queue->isEof(), 'There are more unexpected tokens.'); + } + + protected function assertToken($type, $data, Token $token) + { + //var_dump($token); + $this->assertEquals($type, $token->getType(), + sprintf('Expected token [%s, %s], found [%s, %s]', Token::getTypeName($type), $data, Token::getTypeName($token->getType()), $token->getData())); + + $this->assertEquals($data, trim($token->getData()), + sprintf('Expected token [%s, %s], found [%s, %s]', Token::getTypeName($type), $data, Token::getTypeName($token->getType()), $token->getData())); + } +} diff --git a/tests/PHPCR/Tests/Util/CND/Scanner/TokenQueueTest.php b/tests/PHPCR/Tests/Util/CND/Scanner/TokenQueueTest.php new file mode 100644 index 00000000..087a8d0b --- /dev/null +++ b/tests/PHPCR/Tests/Util/CND/Scanner/TokenQueueTest.php @@ -0,0 +1,89 @@ +token0 = new Token(0, 'token 0'); + $this->token1 = new Token(1, 'token 1'); + $this->token2 = new Token(2, 'token 2'); + $this->token3 = new Token(3, 'token 3'); + + $this->queue = new TokenQueue(); + $this->queue->add($this->token0); + $this->queue->add($this->token1); + $this->queue->add($this->token2); + $this->queue->add($this->token3); + } + + public function testAdd() + { + $queue = new TokenQueue(); + $this->assertAttributeEquals(array(), 'tokens', $queue); + + $queue->add($this->token0); + $this->assertAttributeEquals(array($this->token0), 'tokens', $queue); + + $queue->add($this->token1); + $this->assertAttributeEquals(array($this->token0, $this->token1), 'tokens', $queue); + } + + public function testResetAndPeek() + { + $this->assertEquals($this->token0, $this->queue->reset()); + $this->assertEquals($this->token0, $this->queue->peek()); + } + + public function testIsEofAndNext() + { + // Token0 + $this->assertFalse($this->queue->isEof()); + + // Token1 + $this->queue->next(); + $this->assertFalse($this->queue->isEof()); + + // Token2 + $this->queue->next(); + $this->assertFalse($this->queue->isEof()); + + // Token3 + $this->queue->next(); + $this->assertFalse($this->queue->isEof()); + + // EOF + $this->queue->next(); + $this->assertTrue($this->queue->isEof()); + } + + public function testIsEofEmptyQueue() + { + $queue = new TokenQueue(); + $this->assertTrue($queue->isEof()); + $queue->add(new Token(0, 'token')); + $this->assertFalse($queue->isEof()); + } + + public function testGet() + { + $this->queue->reset(); + $this->assertEquals($this->token0, $this->queue->get()); + $this->assertEquals($this->token1, $this->queue->get()); + $this->assertEquals($this->token2, $this->queue->get()); + $this->assertEquals($this->token3, $this->queue->get()); + $this->assertEquals(false, $this->queue->get()); + } + + public function testGetIterator() + { + $this->assertEquals( + array($this->token0, $this->token1, $this->token2, $this->token3), + iterator_to_array($this->queue->getIterator()) + ); + } +} diff --git a/tests/PHPCR/Tests/Util/CND/Scanner/TokenTest.php b/tests/PHPCR/Tests/Util/CND/Scanner/TokenTest.php new file mode 100644 index 00000000..e110f5b9 --- /dev/null +++ b/tests/PHPCR/Tests/Util/CND/Scanner/TokenTest.php @@ -0,0 +1,35 @@ +token = new Token(123, 'foobar'); + } + + public function test__construct() + { + $this->assertAttributeEquals(123, 'type', $this->token); + $this->assertAttributeEquals('foobar', 'data', $this->token); + } + + public function testGetData() + { + $this->assertEquals('foobar', $this->token->getData()); + } + + public function testGetType() + { + $this->assertEquals(123, $this->token->getType()); + } + + public function test__toString() + { + $this->assertEquals('TOKEN(123, \'foobar\', 0, 0)', $this->token->__toString()); + } + +}