Skip to content

Commit

Permalink
Lexer almost finished
Browse files Browse the repository at this point in the history
  • Loading branch information
unknown committed Oct 14, 2012
1 parent 756a644 commit 8128d25
Show file tree
Hide file tree
Showing 10 changed files with 131 additions and 22 deletions.
Binary file modified .sconsign.dblite
Binary file not shown.
46 changes: 32 additions & 14 deletions src/parser/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ lexer_exception::lexer_exception(const std::string &msg, str_iter position)
token_expectation_exception::token_expectation_exception(
token_type found,
token_type expected)
:std::runtime_error(""),
:std::runtime_error("Unexpected token encountered."),
found(found),
expected(expected)
{
Expand Down Expand Up @@ -49,6 +49,7 @@ bool lexer::match_single_token(token &t, token_type type)
{
t = token(type, match, str_iter::dist(beginning, current));
current = match.get_end_iter();
transform_token(t);
return true;
}
else
Expand All @@ -57,6 +58,20 @@ bool lexer::match_single_token(token &t, token_type type)
}
}

bool lexer::match_expected_token(token &t, token_type type, bool match_skip)
{
if(!valid_token_type(type))
return false;

//skipping whitespace
if(match_skip)
{
token dummy;
match_single_token(dummy, skip);
}
return match_single_token(t, type);
}

bool lexer::match_any_token(token &t)
{
//for(int i = 1; i < TOKEN_TYPE_COUNT; ++i)
Expand Down Expand Up @@ -101,18 +116,8 @@ token lexer::look_ahead(int distance)
return tokens_ahead.front();
}

bool lexer::match_expected_token(token &t, token_type type, bool match_skip)
void lexer::transform_token(token &t)
{
if(!valid_token_type(type))
return false;

//skipping whitespace
if(match_skip)
{
token dummy;
match_single_token(dummy, skip);
}
return match_single_token(t, type);
}

void lexer::rollback()
Expand Down Expand Up @@ -148,9 +153,12 @@ token lexer::consume(token_type type, bool match_skip)
}
}

void lexer::emit(const token &t)
void lexer::emit(token_type type)
{
tokens_ahead.push_front(t);
tokens_ahead.push_front(token(
type,
string(current, current),
str_iter::dist(beginning, current)));
}

// token lexer::consume(token_type type)
Expand Down Expand Up @@ -233,4 +241,14 @@ void lexer::register_token(
always_match_list.push_back(type);
}

void lexer::report_warning(const char *, int position)
{

}

int lexer::get_position()
{
return str_iter::dist(beginning, current);
}

}
9 changes: 6 additions & 3 deletions src/parser/lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class token_expectation_exception : public std::runtime_error

class lexer
{
protected:
token_matcher_map token_matchers;
token_name_map token_names;
token_type_list always_match_list;
Expand All @@ -39,12 +40,11 @@ class lexer

str_iter beginning, current;

protected:
void register_token(token_type, token_match_func, const std::string &, bool);
bool match_single_token(token &, token_type);
bool match_any_token(token &);
bool match_expected_token(token &t, token_type type, bool match_skip);

virtual void transform_token(token &t);
public:
lexer();
lexer(const str_iter &);
Expand All @@ -62,7 +62,10 @@ class lexer
token consume();
token consume(token_type, bool match_skip = true);
void rollback();
void emit(const token &t);
void emit(token_type);

void report_warning(const char *, int position);
int get_position();
};

}
43 changes: 40 additions & 3 deletions src/parser/squid_lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@ namespace squid
{

squid_lexer::squid_lexer()
:lexer()
:lexer(),
space_indent(" "),
indent_level(0)
{
init();
}

squid_lexer::squid_lexer(const str_iter &text)
:lexer(text)
:lexer(text),
space_indent(" "),
indent_level(0)
{
init();
}
Expand All @@ -23,7 +27,6 @@ void squid_lexer::init()
{
register_token(INVALID, match_invalid, "INVALID TOKEN", false);
register_token(END_OF_TEXT, match_end_of_text, "END_OF_TEXT", true);
register_token(INDENT_FRAG, match_indent_frag, "IDENT FRAGMENT", true);
register_token(WHITESPACE, match_whitespace, "WHITESPACE", true);
register_token(PLUS, match_plus, "'+'", true);
register_token(MINUS, match_minus, "'-'", true);
Expand All @@ -35,9 +38,43 @@ void squid_lexer::init()
* INDENT and DEDENT are never matched in text, only
* generated by the lexer when matching INDENT FRAGMENT
*/
register_token(INDENT_FRAG, match_indent_frag, "INDENT FRAGMENT", false);
register_token(INDENT, match_always_fail, "INDENT", false);
register_token(DEDENT, match_always_fail, "DEDENT", false);
}

int squid_lexer::compute_indent_level(str_iter iter, str_iter end)
{
int level = 0;
while(iter.valid() && iter <= end)
{
iter = iter.match(space_indent) | iter.match("\t");
if(iter.valid())
level++;
else
report_warning("Malformed indentation detected", get_position());
}
return level;
}

void squid_lexer::transform_token(token &tok)
{
if(tok.type == INDENT_FRAG)
{
int level = compute_indent_level(tok.text.get_start_iter(), tok.text.get_end_iter());
int new_level = level - indent_level;

}
}

void squid_lexer::consume_indent_token(token_type type)
{
if(!tokens_ahead.empty())
{
if(tokens_ahead.front().type == type)
throw "FIXME";
}
}


}
9 changes: 8 additions & 1 deletion src/parser/squid_lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,23 @@
namespace squid
{

int compute_indent_level(str_iter iter, str_iter end);

class squid_lexer : public lexer
{
str_iter::str_ptr space_indent;
int indent_level;
protected:
void init();
virtual void transform_token(token &t);
int compute_indent_level(str_iter iter, str_iter end);

public:
squid_lexer();
squid_lexer(const str_iter &);
virtual ~squid_lexer();


void consume_indent_token(token_type);
};

}
20 changes: 20 additions & 0 deletions src/parser/string_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,26 @@ class string_iterator
return ptr != rhs.ptr;
}

bool operator<(const self_type &rhs) const
{
return ptr < rhs.ptr;
}

bool operator>(const self_type &rhs) const
{
return ptr > rhs.ptr;
}

bool operator<=(const self_type &rhs) const
{
return ptr <= rhs.ptr;
}

bool operator>=(const self_type &rhs) const
{
return ptr >= rhs.ptr;
}

T operator*() const
{
return *ptr;
Expand Down
2 changes: 1 addition & 1 deletion src/parser/tokens.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ string match_match(const str_iter &iter)
string match_indent_frag(const str_iter &iter)
{
if(!iter.valid()) return string();
return string(iter, iter.match("\n").zero_or_more(match, "\t"));
return string(iter, iter.match("\n").zero_or_more(any, " \t"));
}

string match_whitespace(const str_iter &iter)
Expand Down
Binary file removed src/parser/tokens.obj
Binary file not shown.
21 changes: 21 additions & 0 deletions test/squid_lexer.t.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <cxxtest/TestSuite.h>
#include <parser/squid_lexer.hpp>
#include <cstring>
#include <stdexcept>
#include <sstream>
#include <iostream>
Expand Down Expand Up @@ -50,6 +51,14 @@ class SquidLexerTestSuite : public CxxTest::TestSuite
TS_ASSERT_EQUALS(tok.type, END_OF_TEXT);
}

void test_compute_indent_level()
{
TS_ASSERT_THROWS_NOTHING(squid_lexer("\n ").consume(INDENT_FRAG, false));
TS_ASSERT_THROWS_NOTHING(squid_lexer("\n ").consume(INDENT_FRAG, false));
TS_ASSERT_THROWS_NOTHING(squid_lexer("\n ").consume(INDENT_FRAG, false));
TS_ASSERT_THROWS_NOTHING(squid_lexer("\n ").consume(INDENT_FRAG, false));
}

void test_lookahead()
{
squid_lexer lex("foo + b * c");
Expand All @@ -61,6 +70,18 @@ class SquidLexerTestSuite : public CxxTest::TestSuite
TS_ASSERT_EQUALS(lex.consume().type, IDENTIFIER);
}

void test_emit()
{
squid_lexer lex("+ b * c");
lex.emit(IDENTIFIER);
lex.look_ahead(5);
TS_ASSERT_EQUALS(lex.consume().type, IDENTIFIER);
TS_ASSERT_EQUALS(lex.consume().type, PLUS);
TS_ASSERT_EQUALS(lex.consume().type, IDENTIFIER);
TS_ASSERT_EQUALS(lex.consume().type, MULTIPLY);
TS_ASSERT_EQUALS(lex.consume().type, IDENTIFIER);
}

void test_rollback()
{
squid_lexer lex("foo + b * c");
Expand Down
3 changes: 3 additions & 0 deletions todo
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- popravi consume_indent_token
- sredi generisanje indent tokena
- pocni paser!

0 comments on commit 8128d25

Please sign in to comment.