From dd2f179939b57bb909063ee06c0e759632941408 Mon Sep 17 00:00:00 2001 From: Luckas Date: Sat, 19 Aug 2023 18:08:04 +0300 Subject: [PATCH] update, refactor: remove operations with err --- lexer/src/lib.rs | 168 ++++++--------------------------------------- lexer/src/utils.rs | 4 ++ parser/src/lib.rs | 58 +--------------- 3 files changed, 25 insertions(+), 205 deletions(-) diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index b7a042e..106cc4b 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -35,14 +35,6 @@ impl<'a> Lexer<'a> { self.current_pos.line_start = self.current_pos.line_end; self.current_pos.col_end = self.current_pos.col_start; - if ch == '\n' { - tokens.push(Token::new( - TokenType::LineBreak, - String::from("\n"), - self.current_pos, - )); - } - if self.is_skipable(ch) { continue; } @@ -51,76 +43,9 @@ impl<'a> Lexer<'a> { tokens.push(self.create_token(ch)?); - self.current_lexeme = String::new(); - self.current_pos.col_start = self.current_pos.col_end + 1; - } - - if self.current_pos.col_start > 0 { - self.current_pos.col_start -= 1; // offstet of the last loop - } - self.current_pos.col_end = self.current_pos.col_start; - - tokens.push(Token::new(TokenType::Eof, String::new(), self.current_pos)); - - Ok(tokens) - } - - pub fn tokenize_with_err(&mut self) -> (Vec, Vec) { - let mut tokens: Vec = Vec::new(); - let mut errors: Vec = Vec::new(); - - while let Some(ch) = self.chars.next() { self.current_lexeme.clear(); - self.current_pos.line_start = self.current_pos.line_end; - self.current_pos.col_end = self.current_pos.col_start; - if ch == '\n' { - tokens.push(Token::new( - TokenType::LineBreak, - String::from("\n"), - self.current_pos, - )); - } - - if self.is_skipable(ch) { - continue; - } - - self.current_lexeme += &ch.to_string(); - - match self.create_token(ch) { - Ok(token) => tokens.push(token), - Err(error) => { - while let Some(ch) = self.chars.next() { - self.current_pos.col_end += 1; - self.current_pos.col_start = self.current_pos.col_end; - if ch == '\n' { - match error.kind { - LexicalErrorKind::TrailingQuote(..) => { - self.current_pos.line_end += 1; - self.current_pos.col_end = 0; - self.current_pos.col_start = 0; - } - _ => { - tokens.push(Token::new( - TokenType::LineBreak, - String::from("\n"), - self.current_pos, - )); - self.current_pos.line_end += 1; - self.current_pos.col_end = 0; - self.current_pos.col_start = 0; - break; - } - } - } - } - errors.push(error); - } - } - - // skip if line break - if self.current_pos.col_end > 0 { + if !is_linebreak(ch) { self.current_pos.col_start = self.current_pos.col_end + 1; } } @@ -128,11 +53,14 @@ impl<'a> Lexer<'a> { if self.current_pos.col_start > 0 { self.current_pos.col_start -= 1; // offstet of the last loop } + self.current_pos.col_end = self.current_pos.col_start; - tokens.push(Token::new(TokenType::Eof, String::new(), self.current_pos)); + let eof_token = Token::new(TokenType::Eof, String::new(), self.current_pos); - (tokens, errors) + tokens.push(eof_token); + + Ok(tokens) } fn advance(&mut self) { @@ -150,14 +78,8 @@ impl<'a> Lexer<'a> { } } - if ch.is_whitespace() { - if ch == '\n' { - self.current_pos.line_end += 1; - self.current_pos.col_start = 0; - } else { - self.current_pos.col_start = self.current_pos.col_end + 1; - } - + if ch.is_whitespace() && !is_linebreak(ch) { + self.current_pos.col_start = self.current_pos.col_end + 1; return true; } @@ -176,6 +98,7 @@ impl<'a> Lexer<'a> { fn create_token(&mut self, ch: char) -> Result { match ch { + ch if is_linebreak(ch) => self.create_linebreak(), ch if is_standard_symbol(ch) => self.create_symbol_token(), ch if is_quote(ch) => self.create_string_token(ch), ch if is_alphabetic(ch) => self.create_keyword_or_identifer_token(), @@ -189,6 +112,15 @@ impl<'a> Lexer<'a> { } } + fn create_linebreak(&mut self) -> Result { + let token = Token::new(TokenType::LineBreak, String::from("\n"), self.current_pos); + + self.current_pos.line_end += 1; + self.current_pos.col_start = 0; + + Ok(token) + } + fn create_symbol_token(&mut self) -> Result { if let Some(next_char) = self.chars.peek() { if *next_char == '=' { @@ -245,6 +177,7 @@ impl<'a> Lexer<'a> { while let Some(next_char) = self.chars.peek() { if *next_char == '\\' { self.advance(); + if let Some(next_next_char) = self.chars.peek() { let current_escape_char: String = format!("\\{}", next_next_char); let escape_char = match *next_next_char { @@ -261,6 +194,7 @@ impl<'a> Lexer<'a> { )) } }; + self.current_lexeme += escape_char; self.advance(); } else { @@ -476,66 +410,4 @@ mod tests { ] ) } - - #[test] - fn test_tokenize_with_err() { - let s = "12 + 1..2 -^ unreachable -idk -'hi"; - let (tokens, errors) = Lexer::new(s).tokenize_with_err(); - assert_eq!( - tokens, - vec![ - Token::new( - TokenType::Number(12.0), - String::from("12"), - Position::new(0, 0, 0, 1) - ), - Token::new( - TokenType::Plus, - String::from("+"), - Position::new(0, 3, 0, 3) - ), - Token::new( - TokenType::LineBreak, - String::from("\n"), - Position::new(0, 9, 0, 9) - ), - Token::new( - TokenType::LineBreak, - String::from("\n"), - Position::new(1, 13, 1, 13) - ), - Token::new( - TokenType::Identifier(String::from("idk")), - String::from("idk"), - Position::new(2, 0, 2, 2) - ), - Token::new( - TokenType::LineBreak, - String::from("\n"), - Position::new(2, 3, 2, 3) - ), - Token::new(TokenType::Eof, String::new(), Position::new(3, 2, 3, 2)) - ] - ); - assert_eq!( - errors, - vec![ - LexicalError::new( - LexicalErrorKind::InvalidFloat(String::from("1..2")), - Position::new(0, 5, 0, 8) - ), - LexicalError::new( - LexicalErrorKind::UnexpectedCharacter(String::from("^")), - Position::new(1, 0, 1, 0) - ), - LexicalError::new( - LexicalErrorKind::TrailingQuote('\''), - Position::new(3, 0, 3, 2) - ) - ] - ) - } } diff --git a/lexer/src/utils.rs b/lexer/src/utils.rs index 9e18864..8c0abcb 100644 --- a/lexer/src/utils.rs +++ b/lexer/src/utils.rs @@ -51,3 +51,7 @@ pub fn is_alphabetic(ch: char) -> bool { pub fn is_alphanumeric(ch: char) -> bool { ch.is_ascii_alphanumeric() || ch == '_' } + +pub fn is_linebreak(ch: char) -> bool { + ch == '\n' +} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index cb481a4..4e0ff07 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -22,6 +22,7 @@ impl<'a> Parser<'a> { pub fn parse(&mut self) -> Result, ParsingError> { self.advance(); + let mut nodes: Vec = Vec::new(); while !self.current_token.value.is_eof() { @@ -35,26 +36,6 @@ impl<'a> Parser<'a> { Ok(nodes) } - pub fn parse_with_err(&mut self) -> (Vec, Vec) { - self.advance(); - let mut nodes: Vec = Vec::new(); - let mut errors: Vec = Vec::new(); - - while !self.current_token.value.is_eof() { - match self.parse_statement() { - Ok(statement) => match &statement { - Statement::FunctionDeclaration(_) => nodes.insert(0, statement), - _ => nodes.push(statement), - }, - Err(error) => { - self.handle_error(error, &mut errors); - } - } - } - - (nodes, errors) - } - fn clone_token(&self) -> Token { self.current_token.clone() } @@ -1005,43 +986,6 @@ impl<'a> Parser<'a> { self.advance(); Ok(parameter) } - - fn handle_error(&mut self, error: ParsingError, errors: &mut Vec) { - match error.kind { - ParsingErrorKind::ExpectedColon(_) => self.advance_find(","), - ParsingErrorKind::ExpectedComma(_) => self.advance_find(","), - ParsingErrorKind::ExpectedLeftBrace(_) => self.advance_find("}"), - ParsingErrorKind::ExpectedLeftParenthesis(_) => self.advance_find(")"), - ParsingErrorKind::MissingClosingParenthesis => self.advance_find(")"), - ParsingErrorKind::MissingClosingBracket => self.advance_find("]"), - ParsingErrorKind::MissingClosingBrace => self.advance_find("}"), - _ => { - if !self.current_token.value.is_eof() { - self.advance(); - } - } - } - - match error.kind { - ParsingErrorKind::UnexpectedToken(_) => match errors.last() { - Some(last_err) => match last_err.kind { - ParsingErrorKind::UnexpectedToken(_) => {} - _ => errors.push(error), - }, - None => errors.push(error), - }, - _ => errors.push(error), - } - } - - fn advance_find(&mut self, lexeme: &str) { - while self.current_token.lexeme != lexeme.to_string() { - if self.current_token.value.is_eof() { - break; - } - self.advance(); - } - } } #[cfg(test)]