From ce2898395fdf1d01e6e2873f3c5831452a060aff Mon Sep 17 00:00:00 2001 From: Matthias Thoemmes Date: Sun, 16 Oct 2016 12:23:41 +0200 Subject: [PATCH 1/3] Matching numbers case insensitively, adding intial tokenizer tests, correcting an issue with OperatorType::BitShiftRight, improvements to error handling, updating roadmap --- ROADMAP.md | 13 +++- core/src/error.rs | 2 +- core/src/tokenizer.rs | 21 ++++- core/tests/codegen.rs | 7 +- core/tests/tokenizer.rs | 165 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 198 insertions(+), 10 deletions(-) create mode 100644 core/tests/tokenizer.rs diff --git a/ROADMAP.md b/ROADMAP.md index af27560..bdc4983 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,11 +1,16 @@ -- move ffi (Node.js bridge using Neon) to core project - remove rust-cli, benchmarks - use Node buffers instead of strings - pass transpilation options from cli to core - additional tests for core library - - Test which ES2015 features are not yet covered + - Test and list which ES2015 features are not yet covered + - [ ] Function default parameters + - [ ] Regular expressions + - [ ] Generator functions + - [ ] one-line for loops + - [ ] Template strings + - [ ] Destructing + - ... - implement tests in Rust - implement tests in JS - -> for now, we just return *null* in case of errorsppi -- webpack loader in separate repo +- webpack loader in separate repository - transform ratel AST to JavaScript AST diff --git a/core/src/error.rs b/core/src/error.rs index 2dc6ddc..30bb210 100644 --- a/core/src/error.rs +++ b/core/src/error.rs @@ -46,7 +46,7 @@ impl fmt::Display for ParseError { .lines() .enumerate() .last() - .expect("Must always have at least one line."); + .unwrap_or((0, "")); let colno = line.chars().count(); let token_len = source[start..end].chars().count(); diff --git a/core/src/tokenizer.rs b/core/src/tokenizer.rs index c6b289a..9a61fe1 100644 --- a/core/src/tokenizer.rs +++ b/core/src/tokenizer.rs @@ -234,7 +234,9 @@ define_handlers! { } }, - _ => BSRAssign + b'=' => BSRAssign, + + _ => BitShiftRight } }, @@ -663,6 +665,11 @@ define_handlers! { return Ok(Literal(tok.read_binary())); }, + b'B' => { + tok.bump(); + + return Ok(Literal(tok.read_binary())); + }, b'o' => { tok.bump(); @@ -670,12 +677,24 @@ define_handlers! { return Ok(Literal(tok.read_octal())); }, + b'O' => { + tok.bump(); + + return Ok(Literal(tok.read_octal())); + }, + b'x' => { tok.bump(); return Ok(Literal(tok.read_hexadec())); }, + b'X' => { + tok.bump(); + + return Ok(Literal(tok.read_hexadec())); + }, + _ => {} } diff --git a/core/tests/codegen.rs b/core/tests/codegen.rs index aca33cc..9f0f15b 100644 --- a/core/tests/codegen.rs +++ b/core/tests/codegen.rs @@ -13,24 +13,23 @@ fn output_program(input_program: &str) -> String { macro_rules! assert_compile { ($string:expr, $expect:expr) => { - println!("{:?}", output_program($string)); assert_eq!(output_program($string), $expect.to_string()); } } #[test] fn convert_const_to_var_in_global_scope() { - assert_compile!("const pi = 314;", "var pi=314;"); + assert_compile!("const pi = 3.14;", "var pi=3.14;"); } #[test] fn convert_let_to_var_in_global_scope() { - assert_compile!("let pi = 314;", "var pi=314;"); + assert_compile!("let pi = 3.14;", "var pi=3.14;"); } #[test] fn dont_touch_var_in_global_scope() { - assert_compile!("var pi = 314;", "var pi=314;"); + assert_compile!("var pi = 3.14;", "var pi=3.14;"); } #[test] diff --git a/core/tests/tokenizer.rs b/core/tests/tokenizer.rs new file mode 100644 index 0000000..4d5e167 --- /dev/null +++ b/core/tests/tokenizer.rs @@ -0,0 +1,165 @@ +extern crate ratel; + +pub use ratel::*; +pub use ratel::grammar::*; +pub use ratel::tokenizer::*; +pub use ratel::lexicon::Token; +pub use ratel::lexicon::ReservedKind; +pub use ratel::lexicon::Token::*; +pub use ratel::owned_slice::OwnedSlice; + +fn test_token(input: &str, expected: Token) -> bool { + let mut tokenizer = Tokenizer::new(&input); + let tok = tokenizer.get_token().unwrap(); + if tok != expected { + println!("\n{:?}\n", tok); + } + tok == expected +} + +macro_rules! assert_token { + ($string:expr, $token:expr, $descr:expr) => { + assert_eq!(test_token($string, $token), true, $descr); + } +} + +#[test] +fn test_tokenizer_chars() { + assert_token!(";", Token::Semicolon, "read a Token::Semicolon"); + assert_token!(":", Token::Colon, "read a Token::Colon"); + assert_token!(",", Token::Comma, "read a Token::Comma"); + assert_token!("(", Token::ParenOpen, "read a Token::ParenOpen"); + assert_token!(")", Token::ParenClose, "read a Token::ParenClose"); + assert_token!("[", Token::BracketOpen, "read a Token::BracketOpen"); + assert_token!("]", Token::BracketClose, "read a Token::BracketClose"); + assert_token!("{", Token::BraceOpen, "read a Token::BraceOpen"); + assert_token!("}", Token::BraceClose, "read a Token::BraceClose"); +} + +#[test] +fn test_tokenizer_control_statements() { + assert_token!("break", Token::Break, "read a Token::Break"); + assert_token!("do", Token::Do, "read a Token::Do"); + assert_token!("case", Token::Case, "read a Token::Case"); + assert_token!("else", Token::Else, "read a Token::Else"); + assert_token!("catch", Token::Catch, "read a Token::Catch"); + assert_token!("export", Token::Export, "read a Token::Export"); + assert_token!("class", Token::Class, "read a Token::Class"); + assert_token!("extends", Token::Extends, "read a Token::Extends"); + assert_token!("return", Token::Return, "read a Token::Return"); + assert_token!("while", Token::While, "read a Token::While"); + assert_token!("finally", Token::Finally, "read a Token::Finally"); + assert_token!("super", Token::Super, "read a Token::Super"); + assert_token!("with", Token::With, "read a Token::With"); + assert_token!("continue", Token::Continue, "read a Token::Continue"); + assert_token!("for", Token::For, "read a Token::For"); + assert_token!("switch", Token::Switch, "read a Token::Switch"); + assert_token!("yield", Token::Yield, "read a Token::Yield"); + assert_token!("debugger", Token::Debugger, "read a Token::Debugger"); + assert_token!("function", Token::Function, "read a Token::Function"); + assert_token!("this", Token::This, "read a Token::This"); + assert_token!("default", Token::Default, "read a Token::Default"); + assert_token!("if", Token::If, "read a Token::If"); + assert_token!("throw", Token::Throw, "read a Token::Throw"); + assert_token!("import", Token::Import, "read a Token::Import"); + assert_token!("try", Token::Try, "read a Token::Try"); +} + +#[test] +fn test_tokenizer_operators() { + assert_token!("=>", Token::Operator(OperatorType::FatArrow), "OperatorType::FatArrow"); + assert_token!(".", Token::Operator(OperatorType::Accessor), "OperatorType::Accessor"); + assert_token!("new", Token::Operator(OperatorType::New), "OperatorType::New"); + assert_token!("++", Token::Operator(OperatorType::Increment), "OperatorType::Increment"); + assert_token!("--", Token::Operator(OperatorType::Decrement), "OperatorType::Decrement"); + assert_token!("!", Token::Operator(OperatorType::LogicalNot), "OperatorType::LogicalNot"); + assert_token!("~", Token::Operator(OperatorType::BitwiseNot), "OperatorType::BitwiseNot"); + assert_token!("typeof", Token::Operator(OperatorType::Typeof), "OperatorType::Typeof"); + assert_token!("void", Token::Operator(OperatorType::Void), "OperatorType::Void"); + assert_token!("delete", Token::Operator(OperatorType::Delete), "OperatorType::Delete"); + assert_token!("*", Token::Operator(OperatorType::Multiplication), "OperatorType::Multiplication"); + assert_token!("/", Token::Operator(OperatorType::Division), "OperatorType::Division"); + assert_token!("%", Token::Operator(OperatorType::Remainder), "OperatorType::Remainder"); + assert_token!("**", Token::Operator(OperatorType::Exponent), "OperatorType::Exponent"); + assert_token!("+", Token::Operator(OperatorType::Addition), "OperatorType::Addition"); + assert_token!("-", Token::Operator(OperatorType::Substraction), "OperatorType::Substraction"); + assert_token!("<<", Token::Operator(OperatorType::BitShiftLeft), "OperatorType::BitShiftLeft"); + assert_token!(">>", Token::Operator(OperatorType::BitShiftRight), "OperatorType::BitShiftRight"); + assert_token!(">>>", Token::Operator(OperatorType::UBitShiftRight), "OperatorType::UBitShiftRight"); + assert_token!("<", Token::Operator(OperatorType::Lesser), "OperatorType::Lesser"); + assert_token!("<=", Token::Operator(OperatorType::LesserEquals), "OperatorType::LesserEquals"); + assert_token!(">", Token::Operator(OperatorType::Greater), "OperatorType::Greater"); + assert_token!(">=", Token::Operator(OperatorType::GreaterEquals), "OperatorType::GreaterEquals"); + assert_token!("instanceof", Token::Operator(OperatorType::Instanceof), "OperatorType::Instanceof"); + assert_token!("in", Token::Operator(OperatorType::In), "OperatorType::In"); + assert_token!("===", Token::Operator(OperatorType::StrictEquality), "OperatorType::StrictEquality"); + assert_token!("!==", Token::Operator(OperatorType::StrictInequality), "OperatorType::StrictInequality"); + assert_token!("==", Token::Operator(OperatorType::Equality), "OperatorType::Equality"); + assert_token!("!=", Token::Operator(OperatorType::Inequality), "OperatorType::Inequality"); + assert_token!("&", Token::Operator(OperatorType::BitwiseAnd), "OperatorType::BitwiseAnd"); + assert_token!("^", Token::Operator(OperatorType::BitwiseXor), "OperatorType::BitwiseXor"); + assert_token!("|", Token::Operator(OperatorType::BitwiseOr), "OperatorType::BitwiseOr"); + assert_token!("&&", Token::Operator(OperatorType::LogicalAnd), "OperatorType::LogicalAnd"); + assert_token!("||", Token::Operator(OperatorType::LogicalOr), "OperatorType::LogicalOr"); + assert_token!("?", Token::Operator(OperatorType::Conditional), "OperatorType::Conditional"); + assert_token!("=", Token::Operator(OperatorType::Assign), "OperatorType::Assign"); + assert_token!("+=", Token::Operator(OperatorType::AddAssign), "OperatorType::AddAssign"); + assert_token!("-=", Token::Operator(OperatorType::SubstractAssign), "OperatorType::SubstractAssign"); + assert_token!("**=", Token::Operator(OperatorType::ExponentAssign), "OperatorType::ExponentAssign"); + assert_token!("*=", Token::Operator(OperatorType::MultiplyAssign), "OperatorType::MultiplyAssign"); + assert_token!("/=", Token::Operator(OperatorType::DivideAssign), "OperatorType::DivideAssign"); + assert_token!("%=", Token::Operator(OperatorType::RemainderAssign), "OperatorType::RemainderAssign"); + assert_token!("<<=", Token::Operator(OperatorType::BSLAssign), "OperatorType::BSLAssign"); + assert_token!(">>=", Token::Operator(OperatorType::BSRAssign), "OperatorType::BSRAssign"); + assert_token!(">>>=", Token::Operator(OperatorType::UBSRAssign), "OperatorType::UBSRAssign"); + assert_token!("&=", Token::Operator(OperatorType::BitAndAssign), "OperatorType::BitAndAssign"); + assert_token!("^=", Token::Operator(OperatorType::BitXorAssign), "OperatorType::BitXorAssign"); + assert_token!("|=", Token::Operator(OperatorType::BitOrAssign), "OperatorType::BitOrAssign"); + assert_token!("...", Token::Operator(OperatorType::Spread), "OperatorType::Spread"); +} + +#[test] +fn test_tokenizer_literals() { + assert_token!("undefined", Token::Literal(LiteralValue::LiteralUndefined), "Token::LiteralUndefined"); + assert_token!("null", Token::Literal(LiteralValue::LiteralNull), "Token::LiteralNull"); + assert_token!("true", Token::Literal(LiteralValue::LiteralTrue), "Token::LiteralTrue"); + assert_token!("false", Token::Literal(LiteralValue::LiteralFalse), "Token::LiteralFalse"); + + assert_token!("'foo'", Token::Literal(LiteralString(OwnedSlice::from_static("'foo'"))), "Token::LiteralString"); + assert_token!("\"foo\"", Token::Literal(LiteralString(OwnedSlice::from_static("\"foo\""))), "Token::LiteralString"); + + // assert_token!("2.2", Token::Literal(LiteralNumber(OwnedSlice::from_static("2.2"))), "Token::LiteralNumber"); + assert_token!("2.2", Token::Literal(LiteralFloat(OwnedSlice::from_static("2.2"))), "Token::LiteralFloat"); + + // will be deprecated in favor of LiteralNumber + assert_token!("2", Token::Literal(LiteralFloat(OwnedSlice::from_static("2"))), "Token::LiteralFloat"); + // assert_token!("2", Token::Literal(LiteralInteger(2)), "Token::LiteralInteger"); + + assert_token!("0xff", Token::Literal(LiteralInteger(255)), "Token::LiteralInteger"); + assert_token!("0XFF", Token::Literal(LiteralInteger(255)), "Token::LiteralInteger"); + assert_token!("0b01001011", Token::Literal(LiteralInteger(75)), "Token::LiteralInteger"); + assert_token!("0B01001011", Token::Literal(LiteralInteger(75)), "Token::LiteralInteger"); + assert_token!("0o113", Token::Literal(LiteralInteger(75)), "Token::LiteralInteger"); + assert_token!("0O113", Token::Literal(LiteralInteger(75)), "Token::LiteralInteger"); +} + +#[test] +fn test_tokenizer_reserved() { + assert_token!("enum", Token::Reserved(ReservedKind::Enum), "ReservedKind::Enum"); + assert_token!("implements", Token::Reserved(ReservedKind::Implements), "ReservedKind::Implements"); + assert_token!("package", Token::Reserved(ReservedKind::Package), "ReservedKind::Package"); + assert_token!("protected", Token::Reserved(ReservedKind::Protected), "ReservedKind::Protected"); + assert_token!("interface", Token::Reserved(ReservedKind::Interface), "ReservedKind::Interface"); + assert_token!("private", Token::Reserved(ReservedKind::Private), "ReservedKind::Private"); + assert_token!("public", Token::Reserved(ReservedKind::Public), "ReservedKind::Public"); +} + + +#[test] +fn test_tokenizer_whitespace() { + assert_token!("", Token::EndOfProgram, "empty string"); + assert_token!(" ", Token::EndOfProgram, "whitespaces"); + assert_token!("\n\n\n ", Token::EndOfProgram, "newlines"); + assert_token!("//Comment\n//Comment", Token::EndOfProgram, "single-line comment"); + assert_token!("/**\n * Comment\n */", Token::EndOfProgram, "multi-line comment"); +} From ac2d08c24ff1ded3967f99f2e026b9df8a558798 Mon Sep 17 00:00:00 2001 From: Matthias Thoemmes Date: Sun, 16 Oct 2016 13:28:09 +0200 Subject: [PATCH 2/3] updated ROADMAP --- ROADMAP.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ROADMAP.md b/ROADMAP.md index bdc4983..e930c04 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -3,6 +3,7 @@ - pass transpilation options from cli to core - additional tests for core library - Test and list which ES2015 features are not yet covered + - [ ] Scientific notation for numbers - [ ] Function default parameters - [ ] Regular expressions - [ ] Generator functions From 13a9cf1223c293f5826ee97840a76bb476bf010f Mon Sep 17 00:00:00 2001 From: Matthias Thoemmes Date: Sun, 16 Oct 2016 13:59:42 +0200 Subject: [PATCH 3/3] Updating match statement --- core/src/tokenizer.rs | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/core/src/tokenizer.rs b/core/src/tokenizer.rs index 9a61fe1..c9f53cf 100644 --- a/core/src/tokenizer.rs +++ b/core/src/tokenizer.rs @@ -660,36 +660,19 @@ define_handlers! { tok.bump(); match tok.peek_byte() { - b'b' => { + b'b' | b'B' => { tok.bump(); return Ok(Literal(tok.read_binary())); }, - b'B' => { - tok.bump(); - - return Ok(Literal(tok.read_binary())); - }, - - b'o' => { - tok.bump(); - - return Ok(Literal(tok.read_octal())); - }, - b'O' => { + b'o' | b'O' => { tok.bump(); return Ok(Literal(tok.read_octal())); }, - b'x' => { - tok.bump(); - - return Ok(Literal(tok.read_hexadec())); - }, - - b'X' => { + b'x' | b'X' => { tok.bump(); return Ok(Literal(tok.read_hexadec()));