Skip to content

Commit

Permalink
update, refactor: remove operations with err
Browse files Browse the repository at this point in the history
  • Loading branch information
luckasRanarison committed Aug 19, 2023
1 parent 3bd198b commit dd2f179
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 205 deletions.
168 changes: 20 additions & 148 deletions lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,6 @@ impl<'a> Lexer<'a> {
self.current_pos.line_start = self.current_pos.line_end;
self.current_pos.col_end = self.current_pos.col_start;

if ch == '\n' {
tokens.push(Token::new(
TokenType::LineBreak,
String::from("\n"),
self.current_pos,
));
}

if self.is_skipable(ch) {
continue;
}
Expand All @@ -51,88 +43,24 @@ impl<'a> Lexer<'a> {

tokens.push(self.create_token(ch)?);

self.current_lexeme = String::new();
self.current_pos.col_start = self.current_pos.col_end + 1;
}

if self.current_pos.col_start > 0 {
self.current_pos.col_start -= 1; // offstet of the last loop
}
self.current_pos.col_end = self.current_pos.col_start;

tokens.push(Token::new(TokenType::Eof, String::new(), self.current_pos));

Ok(tokens)
}

pub fn tokenize_with_err(&mut self) -> (Vec<Token>, Vec<LexicalError>) {
let mut tokens: Vec<Token> = Vec::new();
let mut errors: Vec<LexicalError> = Vec::new();

while let Some(ch) = self.chars.next() {
self.current_lexeme.clear();
self.current_pos.line_start = self.current_pos.line_end;
self.current_pos.col_end = self.current_pos.col_start;

if ch == '\n' {
tokens.push(Token::new(
TokenType::LineBreak,
String::from("\n"),
self.current_pos,
));
}

if self.is_skipable(ch) {
continue;
}

self.current_lexeme += &ch.to_string();

match self.create_token(ch) {
Ok(token) => tokens.push(token),
Err(error) => {
while let Some(ch) = self.chars.next() {
self.current_pos.col_end += 1;
self.current_pos.col_start = self.current_pos.col_end;
if ch == '\n' {
match error.kind {
LexicalErrorKind::TrailingQuote(..) => {
self.current_pos.line_end += 1;
self.current_pos.col_end = 0;
self.current_pos.col_start = 0;
}
_ => {
tokens.push(Token::new(
TokenType::LineBreak,
String::from("\n"),
self.current_pos,
));
self.current_pos.line_end += 1;
self.current_pos.col_end = 0;
self.current_pos.col_start = 0;
break;
}
}
}
}
errors.push(error);
}
}

// skip if line break
if self.current_pos.col_end > 0 {
if !is_linebreak(ch) {
self.current_pos.col_start = self.current_pos.col_end + 1;
}
}

if self.current_pos.col_start > 0 {
self.current_pos.col_start -= 1; // offstet of the last loop
}

self.current_pos.col_end = self.current_pos.col_start;

tokens.push(Token::new(TokenType::Eof, String::new(), self.current_pos));
let eof_token = Token::new(TokenType::Eof, String::new(), self.current_pos);

(tokens, errors)
tokens.push(eof_token);

Ok(tokens)
}

fn advance(&mut self) {
Expand All @@ -150,14 +78,8 @@ impl<'a> Lexer<'a> {
}
}

if ch.is_whitespace() {
if ch == '\n' {
self.current_pos.line_end += 1;
self.current_pos.col_start = 0;
} else {
self.current_pos.col_start = self.current_pos.col_end + 1;
}

if ch.is_whitespace() && !is_linebreak(ch) {
self.current_pos.col_start = self.current_pos.col_end + 1;
return true;
}

Expand All @@ -176,6 +98,7 @@ impl<'a> Lexer<'a> {

fn create_token(&mut self, ch: char) -> Result<Token, LexicalError> {
match ch {
ch if is_linebreak(ch) => self.create_linebreak(),
ch if is_standard_symbol(ch) => self.create_symbol_token(),
ch if is_quote(ch) => self.create_string_token(ch),
ch if is_alphabetic(ch) => self.create_keyword_or_identifer_token(),
Expand All @@ -189,6 +112,15 @@ impl<'a> Lexer<'a> {
}
}

fn create_linebreak(&mut self) -> Result<Token, LexicalError> {
let token = Token::new(TokenType::LineBreak, String::from("\n"), self.current_pos);

self.current_pos.line_end += 1;
self.current_pos.col_start = 0;

Ok(token)
}

fn create_symbol_token(&mut self) -> Result<Token, LexicalError> {
if let Some(next_char) = self.chars.peek() {
if *next_char == '=' {
Expand Down Expand Up @@ -245,6 +177,7 @@ impl<'a> Lexer<'a> {
while let Some(next_char) = self.chars.peek() {
if *next_char == '\\' {
self.advance();

if let Some(next_next_char) = self.chars.peek() {
let current_escape_char: String = format!("\\{}", next_next_char);
let escape_char = match *next_next_char {
Expand All @@ -261,6 +194,7 @@ impl<'a> Lexer<'a> {
))
}
};

self.current_lexeme += escape_char;
self.advance();
} else {
Expand Down Expand Up @@ -476,66 +410,4 @@ mod tests {
]
)
}

#[test]
fn test_tokenize_with_err() {
let s = "12 + 1..2
^ unreachable
idk
'hi";
let (tokens, errors) = Lexer::new(s).tokenize_with_err();
assert_eq!(
tokens,
vec![
Token::new(
TokenType::Number(12.0),
String::from("12"),
Position::new(0, 0, 0, 1)
),
Token::new(
TokenType::Plus,
String::from("+"),
Position::new(0, 3, 0, 3)
),
Token::new(
TokenType::LineBreak,
String::from("\n"),
Position::new(0, 9, 0, 9)
),
Token::new(
TokenType::LineBreak,
String::from("\n"),
Position::new(1, 13, 1, 13)
),
Token::new(
TokenType::Identifier(String::from("idk")),
String::from("idk"),
Position::new(2, 0, 2, 2)
),
Token::new(
TokenType::LineBreak,
String::from("\n"),
Position::new(2, 3, 2, 3)
),
Token::new(TokenType::Eof, String::new(), Position::new(3, 2, 3, 2))
]
);
assert_eq!(
errors,
vec![
LexicalError::new(
LexicalErrorKind::InvalidFloat(String::from("1..2")),
Position::new(0, 5, 0, 8)
),
LexicalError::new(
LexicalErrorKind::UnexpectedCharacter(String::from("^")),
Position::new(1, 0, 1, 0)
),
LexicalError::new(
LexicalErrorKind::TrailingQuote('\''),
Position::new(3, 0, 3, 2)
)
]
)
}
}
4 changes: 4 additions & 0 deletions lexer/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,7 @@ pub fn is_alphabetic(ch: char) -> bool {
pub fn is_alphanumeric(ch: char) -> bool {
ch.is_ascii_alphanumeric() || ch == '_'
}

pub fn is_linebreak(ch: char) -> bool {
ch == '\n'
}
58 changes: 1 addition & 57 deletions parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ impl<'a> Parser<'a> {

pub fn parse(&mut self) -> Result<Vec<Statement>, ParsingError> {
self.advance();

let mut nodes: Vec<Statement> = Vec::new();

while !self.current_token.value.is_eof() {
Expand All @@ -35,26 +36,6 @@ impl<'a> Parser<'a> {
Ok(nodes)
}

pub fn parse_with_err(&mut self) -> (Vec<Statement>, Vec<ParsingError>) {
self.advance();
let mut nodes: Vec<Statement> = Vec::new();
let mut errors: Vec<ParsingError> = Vec::new();

while !self.current_token.value.is_eof() {
match self.parse_statement() {
Ok(statement) => match &statement {
Statement::FunctionDeclaration(_) => nodes.insert(0, statement),
_ => nodes.push(statement),
},
Err(error) => {
self.handle_error(error, &mut errors);
}
}
}

(nodes, errors)
}

fn clone_token(&self) -> Token {
self.current_token.clone()
}
Expand Down Expand Up @@ -1005,43 +986,6 @@ impl<'a> Parser<'a> {
self.advance();
Ok(parameter)
}

fn handle_error(&mut self, error: ParsingError, errors: &mut Vec<ParsingError>) {
match error.kind {
ParsingErrorKind::ExpectedColon(_) => self.advance_find(","),
ParsingErrorKind::ExpectedComma(_) => self.advance_find(","),
ParsingErrorKind::ExpectedLeftBrace(_) => self.advance_find("}"),
ParsingErrorKind::ExpectedLeftParenthesis(_) => self.advance_find(")"),
ParsingErrorKind::MissingClosingParenthesis => self.advance_find(")"),
ParsingErrorKind::MissingClosingBracket => self.advance_find("]"),
ParsingErrorKind::MissingClosingBrace => self.advance_find("}"),
_ => {
if !self.current_token.value.is_eof() {
self.advance();
}
}
}

match error.kind {
ParsingErrorKind::UnexpectedToken(_) => match errors.last() {
Some(last_err) => match last_err.kind {
ParsingErrorKind::UnexpectedToken(_) => {}
_ => errors.push(error),
},
None => errors.push(error),
},
_ => errors.push(error),
}
}

fn advance_find(&mut self, lexeme: &str) {
while self.current_token.lexeme != lexeme.to_string() {
if self.current_token.value.is_eof() {
break;
}
self.advance();
}
}
}

#[cfg(test)]
Expand Down

0 comments on commit dd2f179

Please sign in to comment.