Skip to content
Permalink
9a269cd94c
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
984 lines (888 sloc) 15.8 KB
/***************************************************************************
*
* $Id$
*
**************************************************************************/
/**
* @file $HeadURL$
* @author $Author$(hoping@baimashi.com)
* @date $Date$
* @version $Revision$
* @brief
*
**/
#include "Parser.h"
#include "Selector.h"
#include "QueryUtil.h"
CParser::CParser(std::string aInput)
{
mInput = aInput;
mOffset = 0;
}
CParser::~CParser()
{
}
CSelector* CParser::create(std::string aInput)
{
CParser parser(aInput);
return parser.parseSelectorGroup();
}
CSelector* CParser::parseSelectorGroup()
{
CSelector* ret = parseSelector();
while (mOffset < mInput.size())
{
if (mInput[mOffset] != ',')
{
return ret;
}
mOffset++;
CSelector* sel = parseSelector();
CSelector* oldRet = ret;
ret = new CBinarySelector(CBinarySelector::EUnion, ret, sel);
sel->release();
oldRet->release();
}
return ret;
}
CSelector* CParser::parseSelector()
{
skipWhitespace();
CSelector* ret = parseSimpleSelectorSequence();
while (true)
{
char combinator = 0;
if (skipWhitespace())
{
combinator = ' ';
}
if (mOffset >= mInput.size())
{
return ret;
}
char c = mInput[mOffset];
if (c == '+' || c == '>' || c == '~')
{
combinator = c;
mOffset++;
skipWhitespace();
}
else if (c == ',' || c == ')')
{
return ret;
}
if (combinator == 0)
{
return ret;
}
CSelector* oldRet = ret;
CSelector* sel = parseSimpleSelectorSequence();
bool isOk = true;
if (combinator == ' ')
{
ret = new CBinarySelector(CBinarySelector::EDescendant, oldRet, sel);
}
else if (combinator == '>')
{
ret = new CBinarySelector(CBinarySelector::EChild, oldRet, sel);
}
else if (combinator == '+')
{
ret = new CBinarySelector(oldRet, sel, true);
}
else if (combinator == '~')
{
ret = new CBinarySelector(oldRet, sel, true);
}
else
{
isOk = false;
}
oldRet->release();
sel->release();
if(!isOk)
{
throw error("impossible");
}
}
}
CSelector* CParser::parseSimpleSelectorSequence()
{
CSelector* ret = NULL;
if (mOffset >= mInput.size())
{
throw error("expected selector, found EOF instead");
}
char c = mInput[mOffset];
if (c == '*')
{
mOffset++;
}
else if (c == '#' || c == '.' || c == '[' || c == ':')
{
}
else
{
ret = parseTypeSelector();
}
while (mOffset < mInput.size())
{
char c = mInput[mOffset];
CSelector* sel = NULL;
if (c == '#')
{
sel = parseIDSelector();
}
else if (c == '.')
{
sel = parseClassSelector();
}
else if (c == '[')
{
sel = parseAttributeSelector();
}
else if (c == ':')
{
sel = parsePseudoclassSelector();
}
else
{
break;
}
if (ret == NULL)
{
ret = sel;
}
else
{
CSelector* oldRet = ret;
ret = new CBinarySelector(CBinarySelector::EIntersection, ret, sel);
sel->release();
oldRet->release();
}
}
if (ret == NULL)
{
ret = new CSelector();
}
return ret;
}
void CParser::parseNth(int& aA, int& aB)
{
if (mOffset >= mInput.size())
{
goto eof;
}
{
char c = mInput[mOffset];
if (c == '-')
{
mOffset++;
goto negativeA;
}
else if (c == '+')
{
mOffset++;
goto positiveA;
}
else if (c >= '0' && c <= '9')
{
goto positiveA;
}
else if (c == 'n' || c == 'N')
{
goto readN;
}
else if (c == 'o' || c == 'O' || c == 'e' || c == 'E')
{
std::string id = parseName();
id = CQueryUtil::tolower(id);
if (id == "odd")
{
aA = 2;
aB = 1;
}
else if (id == "even")
{
aA = 2;
aB = 0;
}
else
{
throw error("expected 'odd' or 'even', invalid found");
}
return;
}
else
{
goto invalid;
}
}
positiveA:
{
if (mOffset >= mInput.size())
{
goto eof;
}
char c = mInput[mOffset];
if (c >= '0' && c <= '9')
{
aA = parseInteger();
goto readA;
}
else if (c == 'n' || c == 'N')
{
aA = 1;
mOffset++;
goto readN;
}
else
{
goto invalid;
}
}
negativeA:
{
if (mOffset >= mInput.size())
{
goto eof;
}
char c = mInput[mOffset];
if (c >= '0' && c <= '9')
{
aA = -parseInteger();
goto readA;
}
else if (c == 'n' || c == 'N')
{
aA = -1;
mOffset++;
goto readN;
}
else
{
goto invalid;
}
}
readA:
{
if (mOffset >= mInput.size())
{
goto eof;
}
char c = mInput[mOffset];
if (c == 'n' || c == 'N')
{
mOffset++;
goto readN;
}
else
{
aB = aA;
aA = 0;
return;
}
}
readN:
{
skipWhitespace();
if (mOffset >= mInput.size())
{
goto eof;
}
char c = mInput[mOffset];
if (c == '+')
{
mOffset++;
skipWhitespace();
aB = parseInteger();
return;
}
else if (c == '-')
{
mOffset--;
skipWhitespace();
aB = -parseInteger();
return;
}
else
{
aB = 0;
return;
}
}
eof:
{
throw error("unexpected EOF while attempting to parse expression of form an+b");
}
invalid:
{
throw error("unexpected character while attempting to parse expression of form an+b");
}
}
int CParser::parseInteger()
{
size_t offset = mOffset;
int i = 0;
for (; offset < mInput.size(); offset++)
{
char c = mInput[offset];
if (c < '0' || c > '9')
{
break;
}
i = i * 10 + c - '0';
}
if (offset == mOffset)
{
throw error("expected integer, but didn't find it.");
}
mOffset = offset;
return i;
}
CSelector* CParser::parsePseudoclassSelector()
{
if (mOffset >= mInput.size() || mInput[mOffset] != ':')
{
throw error("expected pseudoclass selector (:pseudoclass), found invalid char");
}
mOffset++;
std::string name = parseIdentifier();
name = CQueryUtil::tolower(name);
if (name == "not" || name == "has" || name == "haschild")
{
if (!consumeParenthesis())
{
throw error("expected '(' but didn't find it");
}
CSelector* sel = parseSelectorGroup();
if (!consumeClosingParenthesis())
{
sel->release();
throw error("expected ')' but didn't find it");
}
CUnarySelector::TOperator op;
if (name == "not")
{
op = CUnarySelector::ENot;
}
else if (name == "has")
{
op = CUnarySelector::EHasDescendant;
}
else if (name == "haschild")
{
op = CUnarySelector::EHasChild;
}
else
{
sel->release();
throw error("impossbile");
}
CSelector* ret = new CUnarySelector(op, sel);
sel->release();
return ret;
}
else if (name == "contains" || name == "containsown")
{
if (!consumeParenthesis() || mOffset >= mInput.size())
{
throw error("expected '(' but didn't find it");
}
std::string value;
char c = mInput[mOffset];
if (c == '\'' || c == '"')
{
value = parseString();
}
else
{
value = parseIdentifier();
}
value = CQueryUtil::tolower(value);
skipWhitespace();
if (!consumeClosingParenthesis())
{
throw error("expected ')' but didn't find it");
}
CTextSelector::TOperator op;
if (name == "contains")
{
op = CTextSelector::EContains;
}
else if (name == "containsown")
{
op = CTextSelector::EOwnContains;
}
else
{
throw error("impossibile");
}
return new CTextSelector(op, value);
}
else if (name == "matches" || name == "matchesown")
{
//TODO
throw error("unsupported regex");
}
else if (name == "nth-child" || name == "nth-last-child" || name == "nth-of-type"
|| name == "nth-last-of-type")
{
if (!consumeParenthesis())
{
throw error("expected '(' but didn't find it");
}
int a, b;
parseNth(a, b);
if (!consumeClosingParenthesis())
{
throw error("expected ')' but didn't find it");
}
return new CSelector(a, b, name == "nth-last-child" || name == "nth-last-of-type",
name == "nth-of-type" || name == "nth-last-of-type");
}
else if (name == "first-child")
{
return new CSelector(0, 1, false, false);
}
else if (name == "last-child")
{
return new CSelector(0, 1, true, false);
}
else if (name == "first-of-type")
{
return new CSelector(0, 1, false, true);
}
else if (name == "last-of-type")
{
return new CSelector(0, 1, true, true);
}
else if (name == "only-child")
{
return new CSelector(false);
}
else if (name == "only-of-type")
{
return new CSelector(true);
}
else if (name == "empty")
{
return new CSelector(CSelector::EEmpty);
}
else
{
throw error("unsupported op:" + name);
}
}
CSelector* CParser::parseAttributeSelector()
{
if (mOffset >= mInput.size() || mInput[mOffset] != '[')
{
throw error("expected attribute selector ([attribute]), found invalid char");
}
mOffset++;
skipWhitespace();
std::string key = parseIdentifier();
skipWhitespace();
if (mOffset >= mInput.size())
{
throw error("unexpected EOF in attribute selector");
}
if (mInput[mOffset] == ']')
{
mOffset++;
return new CAttributeSelector(CAttributeSelector::EExists, key);
}
if (mOffset + 2 > mInput.size())
{
throw error("unexpected EOF in attribute selector");
}
std::string op = mInput.substr(mOffset, 2);
if (op[0] == '=')
{
op = "=";
}
else if (op[1] != '=')
{
throw error("expected equality operator, found invalid char");
}
mOffset += op.size();
skipWhitespace();
if (mOffset >= mInput.size())
{
throw error("unexpected EOF in attribute selector");
}
std::string value;
if (op == "#=")
{
//TODo
throw error("unsupported regex");
}
else
{
char c = mInput[mOffset];
if (c == '\'' || c == '"')
{
value = parseString();
}
else
{
value = parseIdentifier();
}
}
skipWhitespace();
if (mOffset >= mInput.size() || mInput[mOffset] != ']')
{
throw error("expected attribute selector ([attribute]), found invalid char");
}
mOffset++;
CAttributeSelector::TOperator aop;
if (op == "=")
{
aop = CAttributeSelector::EEquals;
}
else if (op == "~=")
{
aop = CAttributeSelector::EIncludes;
}
else if (op == "|=")
{
aop = CAttributeSelector::EDashMatch;
}
else if (op == "^=")
{
aop = CAttributeSelector::EPrefix;
}
else if (op == "$=")
{
aop = CAttributeSelector::ESuffix;
}
else if (op == "*=")
{
aop = CAttributeSelector::ESubString;
}
else if (op == "#=")
{
//TODO
throw error("unsupported regex");
}
else
{
throw error("unsupported op:" + op);
}
return new CAttributeSelector(aop, key, value);
}
CSelector* CParser::parseClassSelector()
{
if (mOffset >= mInput.size() || mInput[mOffset] != '.')
{
throw error("expected class selector (.class), found invalid char");
}
mOffset++;
std::string clazz = parseIdentifier();
return new CAttributeSelector(CAttributeSelector::EIncludes, "class", clazz);
}
CSelector* CParser::parseIDSelector()
{
if (mOffset >= mInput.size() || mInput[mOffset] != '#')
{
throw error("expected id selector (#id), found invalid char");
}
mOffset++;
std::string id = parseName();
return new CAttributeSelector(CAttributeSelector::EEquals, "id", id);
}
CSelector* CParser::parseTypeSelector()
{
std::string tag = parseIdentifier();
return new CSelector(gumbo_tag_enum(tag.c_str()));
}
bool CParser::consumeClosingParenthesis()
{
size_t offset = mOffset;
skipWhitespace();
if (mOffset < mInput.size() && mInput[mOffset] == ')')
{
mOffset++;
return true;
}
mOffset = offset;
return false;
}
bool CParser::consumeParenthesis()
{
if (mOffset < mInput.size() && mInput[mOffset] == '(')
{
mOffset++;
skipWhitespace();
return true;
}
return false;
}
bool CParser::skipWhitespace()
{
size_t offset = mOffset;
while (offset < mInput.size())
{
char c = mInput[offset];
if (c == ' ' || c == '\r' || c == '\t' || c == '\n' || c == '\f')
{
offset++;
continue;
}
else if (c == '/')
{
if (mInput.size() > offset + 1 && mInput[offset + 1] == '*')
{
size_t pos = mInput.find("*/", offset + 2);
if (pos != std::string::npos)
{
offset = pos + 2;
continue;
}
}
}
break;
}
if (offset > mOffset)
{
mOffset = offset;
return true;
}
return false;
}
std::string CParser::parseString()
{
size_t offset = mOffset;
if (mInput.size() < offset + 2)
{
throw error("expected string, found EOF instead");
}
char quote = mInput[offset];
offset++;
std::string ret;
while (offset < mInput.size())
{
char c = mInput[offset];
if (c == '\\')
{
if (mInput.size() > offset + 1)
{
char c = mInput[offset + 1];
if (c == '\r')
{
if (mInput.size() > offset + 2 && mInput[offset + 2] == '\n')
{
offset += 3;
continue;
}
}
if (c == '\r' || c == '\n' || c == '\f')
{
offset += 2;
continue;
}
}
mOffset = offset;
ret += parseEscape();
offset = mOffset;
}
else if (c == quote)
{
break;
}
else if (c == '\r' || c == '\n' || c == '\f')
{
throw error("unexpected end of line in string");
}
else
{
size_t start = offset;
while (offset < mInput.size())
{
char c = mInput[offset];
if (c == quote || c == '\\' || c == '\r' || c == '\n' || c == '\f')
{
break;
}
offset++;
}
ret += mInput.substr(start, offset - start);
}
}
if (offset >= mInput.size())
{
throw error("EOF in string");
}
offset++;
mOffset = offset;
return ret;
}
std::string CParser::parseName()
{
size_t offset = mOffset;
std::string ret;
while (offset < mInput.size())
{
char c = mInput[offset];
if (nameChar(c))
{
size_t start = offset;
while (offset < mInput.size() && nameChar(mInput[offset]))
{
offset++;
}
ret += mInput.substr(start, offset - start);
}
else if (c == '\\')
{
mOffset = offset;
ret += parseEscape();
offset = mOffset;
}
else
{
break;
}
}
if (ret == "")
{
throw error("expected name, found EOF instead");
}
mOffset = offset;
return ret;
}
std::string CParser::parseIdentifier()
{
bool startingDash = false;
if (mInput.size() > mOffset && mInput[mOffset] == '-')
{
startingDash = true;
mOffset++;
}
if (mInput.size() <= mOffset)
{
throw error("expected identifier, found EOF instead");
}
char c = mInput[mOffset];
if (!nameStart(c) && c != '\\')
{
throw error("expected identifier, found invalid char");
}
std::string name = parseName();
if (startingDash)
{
name = "-" + name;
}
return name;
}
bool CParser::nameChar(char c)
{
return nameStart(c) || (c == '-') || (c >= '0' && c <= '9');
}
bool CParser::nameStart(char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_') || (c > 127);
}
bool CParser::hexDigit(char c)
{
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
std::string CParser::parseEscape()
{
if (mInput.size() < mOffset + 2 || mInput[mOffset] != '\\')
{
throw error("invalid escape sequence");
}
size_t start = mOffset + 1;
char c = mInput[start];
if (c == '\r' || c == '\n' || c == '\f')
{
throw error("escaped line ending outside string");
}
if (!hexDigit(c))
{
std::string ret = mInput.substr(start, 1);
mOffset += 2;
return ret;
}
size_t i = 0;
std::string ret;
c = 0;
for (i = start; i < mOffset + 6 && i < mInput.size() && hexDigit(mInput[i]); i++)
{
unsigned int d = 0;
char ch = mInput[i];
if (ch >= '0' && ch <= '9')
{
d = ch - '0';
}
else if (ch >= 'a' && ch <= 'f')
{
d = ch - 'a' + 10;
}
else if (ch >= 'A' && ch <= 'F')
{
d = ch - 'A' + 10;
}
else
{
throw error("impossible");
}
if ((i - start) % 2)
{
c += d;
ret.push_back(c);
c = 0;
}
else
{
c += (d << 4);
}
}
if (ret.size() == 0 || c != 0)
{
throw error("invalid hex digit");
}
if (mInput.size() > i)
{
switch (mInput[i])
{
case '\r':
i++;
if (mInput.size() > i && mInput[i] == '\n')
{
i++;
}
break;
case ' ':
case '\t':
case '\n':
case '\f':
i++;
break;
}
}
mOffset = i;
return ret;
}
std::string CParser::error(std::string message)
{
size_t d = mOffset;
std::string ds;
if (d == 0)
{
ds = '0';
}
while (d)
{
ds.push_back(d % 10 + '0');
d /= 10;
}
std::string ret = message + " at:";
for (std::string::reverse_iterator rit = ds.rbegin(); rit != ds.rend(); ++rit) {
ret.push_back(*rit);
}
return ret;
}
/* vim: set ts=4 sw=4 sts=4 tw=100 noet: */