-
-
Notifications
You must be signed in to change notification settings - Fork 339
/
comment.rs
184 lines (173 loc) · 8.97 KB
/
comment.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
use super::{
cold_branch,
search::{byte_search, safe_byte_match_table, SafeByteMatchTable},
source::SourcePosition,
Kind, Lexer,
};
use crate::diagnostics;
use oxc_syntax::identifier::is_line_terminator;
use memchr::memmem::Finder;
// Irregular line breaks - '\u{2028}' (LS) and '\u{2029}' (PS)
const LS_OR_PS_FIRST: u8 = 0xE2;
const LS_BYTES_2_AND_3: [u8; 2] = [0x80, 0xA8];
const PS_BYTES_2_AND_3: [u8; 2] = [0x80, 0xA9];
static LINE_BREAK_TABLE: SafeByteMatchTable =
safe_byte_match_table!(|b| matches!(b, b'\r' | b'\n' | LS_OR_PS_FIRST));
static MULTILINE_COMMENT_START_TABLE: SafeByteMatchTable =
safe_byte_match_table!(|b| matches!(b, b'*' | b'\r' | b'\n' | LS_OR_PS_FIRST));
impl<'a> Lexer<'a> {
/// Section 12.4 Single Line Comment
pub(super) fn skip_single_line_comment(&mut self) -> Kind {
// SAFETY: Requirement not to alter `pos` if return `true` from `if_continue` is satisfied
unsafe {
byte_search! {
lexer: self,
table: LINE_BREAK_TABLE,
continue_if: |next_byte, pos| {
// Match found. Decide whether to continue searching.
// If this is end of comment, create trivia, and advance `pos` to after line break.
// Do that here rather than in `handle_match`, to avoid branching twice on value of
// the matched byte.
#[allow(clippy::if_not_else)]
if next_byte != LS_OR_PS_FIRST {
// `\r` or `\n`
self.trivia_builder
.add_single_line_comment(self.token.start, self.source.offset_of(pos));
// SAFETY: Safe to consume `\r` or `\n` as both are ASCII
pos = pos.add(1);
// We've found the end. Do not continue searching.
false
} else {
// `0xE2`. Could be first byte of LS/PS, or could be some other Unicode char.
// Either way, Unicode is uncommon, so make this a cold branch.
cold_branch(|| {
// SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
// So safe to advance `pos` by 1 and read 2 bytes.
let next2 = pos.add(1).read2();
if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
// Irregular line break
self.trivia_builder
.add_single_line_comment(self.token.start, self.source.offset_of(pos));
// Advance `pos` to after this char.
// SAFETY: `0xE2` is always 1st byte of a 3-byte UTF-8 char,
// so consuming 3 bytes will place `pos` on next UTF-8 char boundary.
pos = pos.add(3);
// We've found the end. Do not continue searching.
false
} else {
// Some other Unicode char beginning with `0xE2`. Continue searching.
true
}
})
}
},
handle_match: |_next_byte, _start| {
self.token.is_on_new_line = true;
Kind::Skip
},
handle_eof: |_start| {
self.trivia_builder.add_single_line_comment(self.token.start, self.offset());
Kind::Skip
},
};
}
}
/// Section 12.4 Multi Line Comment
pub(super) fn skip_multi_line_comment(&mut self) -> Kind {
// If `is_on_new_line` is already set, go directly to faster search which only looks for `*/`
if self.token.is_on_new_line {
return self.skip_multi_line_comment_after_line_break(self.source.position());
}
// SAFETY: Requirement not to alter `pos` if return `true` from `if_continue` is satisfied
unsafe {
byte_search! {
lexer: self,
table: MULTILINE_COMMENT_START_TABLE,
continue_if: |next_byte, pos| {
// Match found. Decide whether to continue searching.
if next_byte == b'*' {
if pos.addr() < self.source.end_addr() - 1 {
// If next byte isn't `/`, continue
// SAFETY: Have checked there's at least 1 further byte to read
if pos.add(1).read() == b'/' {
// Consume `*/`
// SAFETY: Consuming `*/` leaves `pos` on a UTF-8 char boundary
pos = pos.add(2);
false
} else {
true
}
} else {
// This is last byte in file. Continue to `handle_eof`.
// This is illegal in valid JS, so mark this branch cold.
cold_branch(|| true)
}
} else if next_byte == LS_OR_PS_FIRST {
// `0xE2`. Could be first byte of LS/PS, or could be some other Unicode char.
// Either way, Unicode is uncommon, so make this a cold branch.
cold_branch(|| {
// SAFETY: Next byte is `0xE2` which is always 1st byte of a 3-byte UTF-8 char.
// So safe to advance `pos` by 1 and read 2 bytes.
let next2 = pos.add(1).read2();
if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
// Irregular line break
self.token.is_on_new_line = true;
// Ideally we'd go on to `skip_multi_line_comment_after_line_break` here
// but can't do that easily because can't use `return` in a closure.
// But irregular line breaks are rare anyway.
}
// Either way, continue searching
true
})
} else {
// Regular line break.
// No need to look for more line breaks, so switch to faster search just for `*/`.
self.token.is_on_new_line = true;
return self.skip_multi_line_comment_after_line_break(pos.add(1));
}
},
handle_match: |_next_byte, _start| {
self.trivia_builder.add_multi_line_comment(self.token.start, self.offset());
Kind::Skip
},
handle_eof: |_start| {
self.error(diagnostics::UnterminatedMultiLineComment(self.unterminated_range()));
Kind::Eof
},
};
}
}
fn skip_multi_line_comment_after_line_break(&mut self, pos: SourcePosition) -> Kind {
// Can use `memchr` here as only searching for 1 pattern.
// Cache `Finder` instance on `Lexer` as there's a significant cost to creating it.
// `Finder::new` isn't a const function, so can't make it a `static`, and `lazy_static!`
// has a cost each time it's deref-ed. Creating `Finder` unconditionally in `Lexer::new`
// would be efficient for files containing multi-line comments, but would impose pointless
// cost on files which don't. So this is the fastest solution.
if self.multi_line_comment_end_finder.is_none() {
self.multi_line_comment_end_finder = Some(Finder::new("*/"));
}
let finder = self.multi_line_comment_end_finder.as_ref().unwrap();
let remaining = self.source.str_from_pos_to_end(pos).as_bytes();
if let Some(index) = finder.find(remaining) {
// SAFETY: `pos + index + 2` is end of `*/`, so a valid `SourcePosition`
self.source.set_position(unsafe { pos.add(index + 2) });
self.trivia_builder.add_multi_line_comment(self.token.start, self.offset());
Kind::Skip
} else {
self.source.advance_to_end();
self.error(diagnostics::UnterminatedMultiLineComment(self.unterminated_range()));
Kind::Eof
}
}
/// Section 12.5 Hashbang Comments
pub(super) fn read_hashbang_comment(&mut self) -> Kind {
while let Some(c) = self.next_char().as_ref() {
if is_line_terminator(*c) {
break;
}
}
self.token.is_on_new_line = true;
Kind::HashbangComment
}
}