Skip to content

Commit

Permalink
parser: use standard functions to parse identifiers
Browse files Browse the repository at this point in the history
modified the get identifier function to use standard library functions
to parse identifiers; the input string is temporarily put into a
standard input string stream and read from it

the scan word function was renamed to get word and it too was modified
with a temporary standard input string stream, to return its values in
a new work structure containing string, data type (if present) and
parentheses flag (the later two instead of reference arguments, and
given new work type argument (first or second) where a data type and
parentheses are not allowed for the second word

modified two word table search to take standard strings

corrected a problem with defined functions where only function names
that start with a letter are valid defined function names, otherwise
names starting with 'fn' are regular identifiers; added new identifiers
to parser test #2 (identifier tests) to test invalid 'fn' names (are
valid identifier names)
  • Loading branch information
thunder422 committed Nov 7, 2014
1 parent 27f06e8 commit dbbd9fe
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 64 deletions.
141 changes: 87 additions & 54 deletions parser.cpp
Expand Up @@ -83,132 +83,165 @@ TokenUniquePtr Parser::operator()(Number number)

TokenUniquePtr Parser::getIdentifier()
{
DataType dataType; // data type of word
bool paren; // word has opening parenthesis flag
// TODO temporary to simulate m_input as input string stream
std::string tmp {m_input.mid(m_pos).toStdString()};
std::istringstream m_input {tmp};

// check to see if this is the start of a remark
if (m_input.midRef(m_pos).startsWith(m_table.name(Rem_Code),
Qt::CaseInsensitive))
int pos {m_pos};
Word word = getWord(WordType::First);
if (word.string.empty())
{
// remark string is to end-of-line
int pos {m_pos};
int len = m_table.name(Rem_Code).length();
m_pos = m_input.length(); // set to end-of-line
return m_table.newToken(pos, len, Rem_Code,
m_input.mid(pos + len).toStdString());
return TokenUniquePtr{}; // not an identifier
}
// TODO simulate what getWord will do to input stream
m_input.seekg(m_pos - pos);

int pos {scanWord(m_pos, dataType, paren)};
if (pos == -1)
// check to see if this is the start of a remark
// (need to check separately since a space not required after 'REM')
std::string name {m_table.name(Rem_Code).toStdString()};
if (word.string.length() >= name.length() && std::equal(name.begin(),
name.end(), word.string.begin(), noCaseCompare))
{
return TokenUniquePtr{}; // not an identifier
// move to first char after 'REM'
m_input.seekg(name.length());
// read remark string to end-of-line
std::getline(m_input, word.string);
m_pos = pos + name.length() + word.string.length(); // to end-of-line
return m_table.newToken(pos, name.length(), Rem_Code,
std::move(word.string));
}

int len {pos - m_pos};
Token::Type type {};
Code code;
// defined function?
if (m_input.midRef(m_pos).startsWith("FN", Qt::CaseInsensitive))
// defined function? (must also have a letter after "FN")
if (word.string.length() >= 3 && toupper(word.string[0]) == 'F'
&& toupper(word.string[1]) == 'N' && isalpha(word.string[2]))
{
type = paren ? Token::Type::DefFuncP : Token::Type::DefFuncN;
type = word.paren ? Token::Type::DefFuncP : Token::Type::DefFuncN;
}
else
{
SearchType search= paren ? ParenWord_SearchType : PlainWord_SearchType;
code = m_table.search(search, m_input.mid(m_pos, len).toStdString());
code = m_table.search(word.paren
? ParenWord_SearchType : PlainWord_SearchType, word.string);
if (code == Invalid_Code)
{
// word not found in table, therefore
// must be variable, array, generic function, or subroutine
// but that can't be determined here, so just generic token
type = paren ? Token::Type::Paren : Token::Type::NoParen;
type = word.paren ? Token::Type::Paren : Token::Type::NoParen;
}
}
std::swap(m_pos, pos); // swap begin and end positions

if (type != Token::Type{})
{
if (paren)
if (word.paren)
{
--len; // don't store parentheses in token string
word.string.pop_back(); // don't store parentheses in token string
}
return TokenUniquePtr{new Token {pos, len, type, dataType, m_input}};
int len = word.string.length();
return TokenUniquePtr{new Token {pos, len, type, word.dataType,
word.string}};
}

// found word in table (command, internal function, or operator)
if (m_table.multiple(code) != Multiple::OneWord)
{
// command could be a two word command
skipWhitespace();
int pos2 {scanWord(m_pos, dataType, paren)};
if (dataType == DataType::None && !paren) // possible second word?
Word word2 = getWord(WordType::Second);
// check for possible second word (no data type and no paren words only)
if (!word2.string.empty())
{
Code code2;
if ((code2 = m_table.search(m_input.midRef(pos, len),
m_input.midRef(m_pos, pos2 - m_pos))) != Invalid_Code)
if ((code2 = m_table.search(word.string, word2.string))
!= Invalid_Code)
{
// double word command found
code = code2;
len = pos2 - pos;
m_pos = pos2; // move position past second word
}
else // reset position back to begin of second word
{
m_pos -= word2.string.length();
}
}
}
int len = m_pos - pos;
return m_table.newToken(pos, len, code);
}


// function to get a word at the position specified
// function to get a word at current position in input stream
//
// - returns -1 if there is not an identifier at point
// - returns index to character after identifier
// - returns the string of the word along with data type and parentheses
// flag in a Word structure
// - returns data type found or None if none was found
// - returns flag if opening parenthesis at end of identifier
// - returns empty string if there is not an identifier at position
// - input position moved to end of word for valid identifier
// - work type argument identifies first or second word
// - second word for command only, so no data type or parentheses allowed
// - the input position is not moved if second word not valid

int Parser::scanWord(int pos, DataType &dataType, bool &paren)
Parser::Word Parser::getWord(WordType wordType)
{
if (!m_input[pos].isLetter())
// TODO temporary to simulate m_input as input string stream
std::string tmp {m_input.mid(m_pos).toStdString()};
std::istringstream m_input {tmp};

Word word;

if (!isalpha(m_input.peek()))
{
return -1; // not an identifier
return word; // not an identifier, return empty word
}

do
{
pos++;
word.string.push_back(m_input.get()); // get character
}
while (m_input[pos].isLetterOrNumber() || m_input[pos] == '_');
// pos now points to non-alnum or '_'
while (isalnum(m_input.peek()) || m_input.peek() == '_');
// next character is non-alnum or '_'

// see if there is a data type symbol next
switch (m_input[pos].unicode())
switch (m_input.peek())
{
case '%':
dataType = DataType::Integer;
pos++;
word.dataType = DataType::Integer;
word.string.push_back(m_input.get()); // get data type character
break;
case '$':
dataType = DataType::String;
pos++;
word.dataType = DataType::String;
word.string.push_back(m_input.get()); // get data type character
break;
case '#':
dataType = DataType::Double;
pos++;
word.dataType = DataType::Double;
word.string.push_back(m_input.get()); // get data type character
break;
default:
dataType = DataType::None;
word.dataType = DataType::None;
}

// see if there is an opening parenthesis
if (m_input[pos] == '(')
if (m_input.peek() == '(')
{
paren = true;
pos++;
word.paren = true;
word.string.push_back(m_input.get()); // get '(' character
}
else
{
paren = false;
word.paren = false;
}

// p now points to next character after identifier
return pos;
if (wordType == WordType::Second && (word.dataType != DataType::None
|| word.paren))
{
word.string.clear(); // not a valid second command word
}
else
{
m_pos += word.string.length();
}
return word;
}


Expand Down
14 changes: 13 additions & 1 deletion parser.h
Expand Up @@ -54,7 +54,19 @@ class Parser

// support functions
void skipWhitespace();
int scanWord(int pos, DataType &datatype, bool &paren);
struct Word
{
std::string string; // string of word
DataType dataType; // data type of word
bool paren; // word has an opening parentheses
};
enum class WordType
{
First, // fully typed with optional parentheses word
Second // untyped no parentheses second word of command
};

Word getWord(WordType wordType);

Table &m_table; // pointer to the table object
QString m_input; // input line being parsed
Expand Down
13 changes: 9 additions & 4 deletions table.cpp
Expand Up @@ -2064,14 +2064,19 @@ Code Table::search(SearchType type, const std::string &string) const
// - returns the index of the entry that is found
// - returns -1 if the string was not found in the table

Code Table::search(const QStringRef &word1, const QStringRef &word2) const
Code Table::search(const std::string &word1, const std::string &word2) const
{
for (Code i {m_range[PlainWord_SearchType].beg};
i < m_range[PlainWord_SearchType].end; i++)
{
if (!m_entry[i].name2.isNull()
&& word1.compare(m_entry[i].name, Qt::CaseInsensitive) == 0
&& word2.compare(m_entry[i].name2, Qt::CaseInsensitive) == 0)
std::string name {m_entry[i].name.toStdString()};
std::string name2 {m_entry[i].name2.toStdString()};
if (!name2.empty() && name.size() == word1.size()
&& name2.size() == word2.size()
&& std::equal(word1.begin(), word1.end(), name.begin(),
noCaseCompare)
&& std::equal(word2.begin(), word2.end(), name2.begin(),
noCaseCompare))
{
return i;
}
Expand Down
2 changes: 1 addition & 1 deletion table.h
Expand Up @@ -149,7 +149,7 @@ class Table

// TABLE SPECIFIC FUNCTIONS
Code search(SearchType type, const std::string &string) const;
Code search(const QStringRef &word1, const QStringRef &word2) const;
Code search(const std::string &word1, const std::string &word2) const;
Code search(Code code, int argumentCount) const;
Code search(Code code, DataType *dataType) const;
bool match(Code code, DataType *dataType) const;
Expand Down
3 changes: 3 additions & 0 deletions test/parser2.dat
Expand Up @@ -15,3 +15,6 @@ rem this should be a comment
rem:this should be a comment

rem-this should be a comment

# more 'FN' tests (these should be plain identifiers)
fn fn( fn1 fn1( fn_ fn_(
9 changes: 9 additions & 0 deletions test/parser2.txt
Expand Up @@ -62,3 +62,12 @@ Input: rem:this should be a comment
Input: rem-this should be a comment
0: Command Op Rem |-this should be a comment|
28: Operator Op None EOL

Input: fn fn( fn1 fn1( fn_ fn_(
0: NoParen None |fn|
3: Paren () None |fn(|
7: NoParen None |fn1|
11: Paren () None |fn1(|
16: NoParen None |fn_|
20: Paren () None |fn_(|
24: Operator Op None EOL
7 changes: 3 additions & 4 deletions token.h
Expand Up @@ -69,10 +69,9 @@ class Token

// constructor for identifiers
Token(int column, int length, Type type, DataType dataType,
const QString &inputString) : m_column{column}, m_length{length},
m_type{type}, m_dataType{dataType}, m_string{inputString.mid(column,
length)}, m_code{Invalid_Code}, m_reference{}, m_subCode{None_SubCode}
{}
const std::string &string) : m_column{column}, m_length{length},
m_type{type}, m_dataType{dataType}, m_string{string.c_str()},
m_code{Invalid_Code}, m_reference{}, m_subCode{None_SubCode} {}

// constructor for integer constants
Token(int column, int length, const std::string string, int value) :
Expand Down

0 comments on commit dbbd9fe

Please sign in to comment.