From 2c06a64e502bac6839375c5636d39a172a609a5f Mon Sep 17 00:00:00 2001 From: Hayley Kwan Date: Sat, 9 Dec 2023 20:01:58 +0000 Subject: [PATCH] feat: allow underscores in integer literals (#3746) # Description ## Problem\* Resolves #3283 ## Summary\* This PR adds support for underscores in numeric literals. This is restricted to not allow multiple underscores in a row and a literal cannot start or end with an underscore. ## Additional Context ## Documentation\* Check one: - [ ] No documentation needed. - [ ] Documentation included in this PR. - [ ] **[Exceptional Case]** Documentation to be submitted in a separate PR. # PR Checklist\* - [x] I have tested the changes locally. - [x] I have formatted the changes with [Prettier](https://prettier.io/) and/or `cargo fmt` on default settings. --------- Co-authored-by: Tom French <15848336+TomAFrench@users.noreply.github.com> Co-authored-by: kevaundray --- compiler/noirc_frontend/src/lexer/lexer.rs | 52 ++++++++++++++++++---- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index 9d2c1a6bca..fd8168e36c 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -323,11 +323,29 @@ impl<'a> Lexer<'a> { let start = self.position; let integer_str = self.eat_while(Some(initial_char), |ch| { - ch.is_ascii_digit() | ch.is_ascii_hexdigit() | (ch == 'x') + ch.is_ascii_digit() | ch.is_ascii_hexdigit() | (ch == 'x') | (ch == '_') }); let end = self.position; + // We want to enforce some simple rules about usage of underscores: + // 1. Underscores cannot appear at the end of a integer literal. e.g. 0x123_. + // 2. There cannot be more than one underscore consecutively, e.g. 0x5__5, 5__5. + // + // We're not concerned with an underscore at the beginning of a decimal literal + // such as `_5` as this would be lexed into an ident rather than an integer literal. + let invalid_underscore_location = integer_str.ends_with('_'); + let consecutive_underscores = integer_str.contains("__"); + if invalid_underscore_location || consecutive_underscores { + return Err(LexerErrorKind::InvalidIntegerLiteral { + span: Span::inclusive(start, end), + found: integer_str, + }); + } + + // Underscores needs to be stripped out before the literal can be converted to a `FieldElement. + let integer_str = integer_str.replace('_', ""); + let integer = match FieldElement::try_from_str(&integer_str) { None => { return Err(LexerErrorKind::InvalidIntegerLiteral { @@ -930,15 +948,33 @@ mod tests { } #[test] - fn test_eat_hex_int() { - let input = "0x05"; - - let expected = vec![Token::Int(5_i128.into())]; - let mut lexer = Lexer::new(input); + fn test_eat_integer_literals() { + let test_cases: Vec<(&str, Token)> = vec![ + ("0x05", Token::Int(5_i128.into())), + ("5", Token::Int(5_i128.into())), + ("0x1234_5678", Token::Int(0x1234_5678_u128.into())), + ("0x_01", Token::Int(0x1_u128.into())), + ("1_000_000", Token::Int(1_000_000_u128.into())), + ]; - for token in expected.into_iter() { + for (input, expected_token) in test_cases { + let mut lexer = Lexer::new(input); let got = lexer.next_token().unwrap(); - assert_eq!(got, token); + assert_eq!(got.token(), &expected_token); + } + } + + #[test] + fn test_reject_invalid_underscores_in_integer_literal() { + let test_cases: Vec<&str> = vec!["0x05_", "5_", "5__5", "0x5__5"]; + + for input in test_cases { + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert!( + matches!(token, Err(LexerErrorKind::InvalidIntegerLiteral { .. })), + "expected {input} to throw error" + ); } }