Skip to content

Commit

Permalink
shorten the unsafe
Browse files Browse the repository at this point in the history
  • Loading branch information
Boshen committed Jan 16, 2024
1 parent 2d2fae1 commit 0daa3ef
Showing 1 changed file with 75 additions and 72 deletions.
147 changes: 75 additions & 72 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,15 @@ impl<'a> Lexer<'a> {

/// Consume the current char when it's known to be ASCII.
/// This compiles down to a single instruction, just incrementing `chars` iterator's pointer.
/// SAFETY: Caller must ensure not at EOF and current char is ASCII.
/// NOTE: Caller must ensure not at EOF and current char is ASCII.
#[inline]
unsafe fn consume_ascii_char(&mut self) -> char {
fn consume_ascii_char(&mut self) -> char {
let s = self.current.chars.as_str();
assert_unchecked!(!s.is_empty());
assert_unchecked!(s.as_bytes()[0] < 128);
// SAFETY: Caller must ensure not at EOF and current char is ASCII.
unsafe {
assert_unchecked!(!s.is_empty());
assert_unchecked!(s.as_bytes()[0] < 128);
}
self.current.chars.next().unwrap()
}

Expand Down Expand Up @@ -1329,31 +1332,31 @@ static BYTE_HANDLERS: [ByteHandler; 128] = [

// `\0` `\1` etc
const ERR: ByteHandler = |lexer| {
// SAFETY: Next char is an ASCII char e.g. `\0`
let c = unsafe { lexer.consume_ascii_char() };
// Next char is an ASCII char e.g. `\0`
let c = lexer.consume_ascii_char();
lexer.error(diagnostics::InvalidCharacter(c, lexer.unterminated_range()));
Kind::Undetermined
};

// <SPACE> <TAB> <VT> <FF>
const SPS: ByteHandler = |lexer| {
// SAFETY: Next char is an ASCII space character
unsafe { lexer.consume_ascii_char() };
// Next char is an ASCII space character
lexer.consume_ascii_char();
Kind::WhiteSpace
};

// '\r' '\n'
const LIN: ByteHandler = |lexer| {
// SAFETY: Next char is `\r` or `\n`, which are both ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `\r` or `\n`, which are both ASCII
lexer.consume_ascii_char();
lexer.current.token.is_on_new_line = true;
Kind::NewLine
};

// !
const EXL: ByteHandler = |lexer| {
// SAFETY: Next char is `!`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `!`, which is ASCII
lexer.consume_ascii_char();
if lexer.next_eq('=') {
if lexer.next_eq('=') {
Kind::Neq2
Expand All @@ -1367,8 +1370,8 @@ const EXL: ByteHandler = |lexer| {

// ' "
const QOT: ByteHandler = |lexer| {
// SAFETY: Next char is `'` or `"`, which are both ASCII
let c = unsafe { lexer.consume_ascii_char() };
// Next char is `'` or `"`, which are both ASCII
let c = lexer.consume_ascii_char();
if lexer.context == LexerContext::JsxAttributeValue {
lexer.read_jsx_string_literal(c)
} else {
Expand All @@ -1378,8 +1381,8 @@ const QOT: ByteHandler = |lexer| {

// #
const HAS: ByteHandler = |lexer| {
// SAFETY: Next char is `#`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `#`, which is ASCII
lexer.consume_ascii_char();
// HashbangComment ::
// `#!` SingleLineCommentChars?
if lexer.current.token.start == 0 && lexer.next_eq('!') {
Expand All @@ -1396,8 +1399,8 @@ const IDT: ByteHandler = |lexer| {

// %
const PRC: ByteHandler = |lexer| {
// SAFETY: Next char is `%`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `%`, which is ASCII
lexer.consume_ascii_char();
if lexer.next_eq('=') {
Kind::PercentEq
} else {
Expand All @@ -1407,8 +1410,8 @@ const PRC: ByteHandler = |lexer| {

// &
const AMP: ByteHandler = |lexer| {
// SAFETY: Next char is `&`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `&`, which is ASCII
lexer.consume_ascii_char();
if lexer.next_eq('&') {
if lexer.next_eq('=') {
Kind::Amp2Eq
Expand All @@ -1424,22 +1427,22 @@ const AMP: ByteHandler = |lexer| {

// (
const PNO: ByteHandler = |lexer| {
// SAFETY: Next char is `(`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `(`, which is ASCII
lexer.consume_ascii_char();
Kind::LParen
};

// )
const PNC: ByteHandler = |lexer| {
// SAFETY: Next char is `)`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `)`, which is ASCII
lexer.consume_ascii_char();
Kind::RParen
};

// *
const ATR: ByteHandler = |lexer| {
// SAFETY: Next char is `*`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `*`, which is ASCII
lexer.consume_ascii_char();
if lexer.next_eq('*') {
if lexer.next_eq('=') {
Kind::Star2Eq
Expand All @@ -1455,8 +1458,8 @@ const ATR: ByteHandler = |lexer| {

// +
const PLS: ByteHandler = |lexer| {
// SAFETY: Next char is `+`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `+`, which is ASCII
lexer.consume_ascii_char();
if lexer.next_eq('+') {
Kind::Plus2
} else if lexer.next_eq('=') {
Expand All @@ -1468,29 +1471,29 @@ const PLS: ByteHandler = |lexer| {

// ,
const COM: ByteHandler = |lexer| {
// SAFETY: Next char is `,`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `,`, which is ASCII
lexer.consume_ascii_char();
Kind::Comma
};

// -
const MIN: ByteHandler = |lexer| {
// SAFETY: Next char is `-`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `-`, which is ASCII
lexer.consume_ascii_char();
lexer.read_minus().unwrap_or_else(|| lexer.skip_single_line_comment())
};

// .
const PRD: ByteHandler = |lexer| {
// SAFETY: Next char is `.`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `.`, which is ASCII
lexer.consume_ascii_char();
lexer.read_dot()
};

// /
const SLH: ByteHandler = |lexer| {
// SAFETY: Next char is `/`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `/`, which is ASCII
lexer.consume_ascii_char();
match lexer.peek() {
Some('/') => {
lexer.current.chars.next();
Expand All @@ -1513,43 +1516,43 @@ const SLH: ByteHandler = |lexer| {

// 0
const ZER: ByteHandler = |lexer| {
// SAFETY: Next char is `0`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `0`, which is ASCII
lexer.consume_ascii_char();
lexer.read_zero()
};

// 1 to 9
const DIG: ByteHandler = |lexer| {
// SAFETY: Next char is an ASCII digit
unsafe { lexer.consume_ascii_char() };
// Next char is an ASCII digit
lexer.consume_ascii_char();
lexer.decimal_literal_after_first_digit()
};

// :
const COL: ByteHandler = |lexer| {
// SAFETY: Next char is `:`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `:`, which is ASCII
lexer.consume_ascii_char();
Kind::Colon
};

// ;
const SEM: ByteHandler = |lexer| {
// SAFETY: Next char is `;`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `;`, which is ASCII
lexer.consume_ascii_char();
Kind::Semicolon
};

// <
const LSS: ByteHandler = |lexer| {
// SAFETY: Next char is `<`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `<`, which is ASCII
lexer.consume_ascii_char();
lexer.read_left_angle().unwrap_or_else(|| lexer.skip_single_line_comment())
};

// =
const EQL: ByteHandler = |lexer| {
// SAFETY: Next char is `=`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `=`, which is ASCII
lexer.consume_ascii_char();
if lexer.next_eq('=') {
if lexer.next_eq('=') {
Kind::Eq3
Expand All @@ -1565,16 +1568,16 @@ const EQL: ByteHandler = |lexer| {

// >
const GTR: ByteHandler = |lexer| {
// SAFETY: Next char is `>`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `>`, which is ASCII
lexer.consume_ascii_char();
// `>=` is re-lexed with [Lexer::next_jsx_child]
Kind::RAngle
};

// ?
const QST: ByteHandler = |lexer| {
// SAFETY: Next char is `?`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `?`, which is ASCII
lexer.consume_ascii_char();
if lexer.next_eq('?') {
if lexer.next_eq('=') {
Kind::Question2Eq
Expand All @@ -1596,26 +1599,26 @@ const QST: ByteHandler = |lexer| {

// @
const AT_: ByteHandler = |lexer| {
// SAFETY: Next char is `@`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `@`, which is ASCII
lexer.consume_ascii_char();
Kind::At
};

// [
const BTO: ByteHandler = |lexer| {
// SAFETY: Next char is `[`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `[`, which is ASCII
lexer.consume_ascii_char();
Kind::LBrack
};

// \
const ESC: ByteHandler = |lexer| {
let lexer_ref = lexer as &Lexer<'_>;
let mut builder = AutoCow::new(lexer_ref);
// SAFETY: Next char at start of this function was `\`, which is ASCII.
// Next char at start of this function was `\`, which is ASCII.
// `AutoCow::new` cannot have changed the state of `lexer.current.chars` iterator,
// as we explicitly passed it only an immutable reference.
unsafe { lexer.consume_ascii_char() };
lexer.consume_ascii_char();
builder.force_allocation_without_current_ascii_char(lexer);
lexer.identifier_unicode_escape_sequence(&mut builder, true);
let text = lexer.identifier_name(builder);
Expand All @@ -1624,15 +1627,15 @@ const ESC: ByteHandler = |lexer| {

// ]
const BTC: ByteHandler = |lexer| {
// SAFETY: Next char is `]`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `]`, which is ASCII
lexer.consume_ascii_char();
Kind::RBrack
};

// ^
const CRT: ByteHandler = |lexer| {
// SAFETY: Next char is `^`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `^`, which is ASCII
lexer.consume_ascii_char();
if lexer.next_eq('=') {
Kind::CaretEq
} else {
Expand All @@ -1642,22 +1645,22 @@ const CRT: ByteHandler = |lexer| {

// `
const TPL: ByteHandler = |lexer| {
// SAFETY: Next char is '`', which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is '`', which is ASCII
lexer.consume_ascii_char();
lexer.read_template_literal(Kind::TemplateHead, Kind::NoSubstitutionTemplate)
};

// {
const BEO: ByteHandler = |lexer| {
// SAFETY: Next char is `{`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `{`, which is ASCII
lexer.consume_ascii_char();
Kind::LCurly
};

// |
const PIP: ByteHandler = |lexer| {
// SAFETY: Next char is `|`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `|`, which is ASCII
lexer.consume_ascii_char();
if lexer.next_eq('|') {
if lexer.next_eq('=') {
Kind::Pipe2Eq
Expand All @@ -1673,15 +1676,15 @@ const PIP: ByteHandler = |lexer| {

// }
const BEC: ByteHandler = |lexer| {
// SAFETY: Next char is `}`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `}`, which is ASCII
lexer.consume_ascii_char();
Kind::RCurly
};

// ~
const TLD: ByteHandler = |lexer| {
// SAFETY: Next char is `~`, which is ASCII
unsafe { lexer.consume_ascii_char() };
// Next char is `~`, which is ASCII
lexer.consume_ascii_char();
Kind::Tilde
};

Expand Down

0 comments on commit 0daa3ef

Please sign in to comment.