Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delay literal unescaping #118699

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3428,6 +3428,7 @@ dependencies = [
"rustc_index",
"rustc_macros",
"rustc_middle",
"rustc_parse",
"rustc_session",
"rustc_span",
"rustc_target",
Expand Down
276 changes: 193 additions & 83 deletions compiler/rustc_ast/src/util/literal.rs

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions compiler/rustc_ast_lowering/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ rustc_hir = { path = "../rustc_hir" }
rustc_index = { path = "../rustc_index" }
rustc_macros = { path = "../rustc_macros" }
rustc_middle = { path = "../rustc_middle" }
rustc_parse = { path = "../rustc_parse" }
rustc_session = { path = "../rustc_session" }
rustc_span = { path = "../rustc_span" }
rustc_target = { path = "../rustc_target" }
Expand Down
15 changes: 7 additions & 8 deletions compiler/rustc_ast_lowering/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_hir as hir;
use rustc_hir::def::{DefKind, Res};
use rustc_middle::span_bug;
use rustc_session::errors::report_lit_error;
use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs;
use rustc_span::source_map::{respan, Spanned};
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::DUMMY_SP;
Expand Down Expand Up @@ -119,13 +119,12 @@ impl<'hir> LoweringContext<'_, 'hir> {
hir::ExprKind::Unary(op, ohs)
}
ExprKind::Lit(token_lit) => {
let lit_kind = match LitKind::from_token_lit(*token_lit) {
Ok(lit_kind) => lit_kind,
Err(err) => {
report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span);
LitKind::Err
}
};
let lit_kind = token_lit_to_lit_kind_and_report_errs(
&self.tcx.sess.parse_sess,
*token_lit,
e.span,
)
.unwrap_or(LitKind::Err);
let lit = self.arena.alloc(respan(self.lower_span(e.span), lit_kind));
hir::ExprKind::Lit(lit)
}
Expand Down
63 changes: 31 additions & 32 deletions compiler/rustc_builtin_macros/src/concat.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use rustc_ast as ast;
use rustc_ast::tokenstream::TokenStream;
use rustc_expand::base::{self, DummyResult};
use rustc_session::errors::report_lit_error;
use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs;
use rustc_span::symbol::Symbol;

use crate::errors;
Expand All @@ -19,44 +19,43 @@ pub fn expand_concat(
let mut has_errors = false;
for e in es {
match e.kind {
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => {
accumulator.push_str(s.as_str());
}
Ok(ast::LitKind::Char(c)) => {
accumulator.push(c);
}
Ok(ast::LitKind::Int(i, _)) => {
accumulator.push_str(&i.to_string());
}
Ok(ast::LitKind::Bool(b)) => {
accumulator.push_str(&b.to_string());
}
Ok(ast::LitKind::CStr(..)) => {
cx.emit_err(errors::ConcatCStrLit { span: e.span });
has_errors = true;
}
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
cx.emit_err(errors::ConcatBytestr { span: e.span });
has_errors = true;
}
Ok(ast::LitKind::Err) => {
has_errors = true;
}
Err(err) => {
report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span);
has_errors = true;
ast::ExprKind::Lit(token_lit) => {
match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, e.span)
{
Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => {
accumulator.push_str(s.as_str());
}
Ok(ast::LitKind::Char(c)) => {
accumulator.push(c);
}
Ok(ast::LitKind::Int(i, _)) => {
accumulator.push_str(&i.to_string());
}
Ok(ast::LitKind::Bool(b)) => {
accumulator.push_str(&b.to_string());
}
Ok(ast::LitKind::CStr(..)) => {
cx.emit_err(errors::ConcatCStrLit { span: e.span });
has_errors = true;
}
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
cx.emit_err(errors::ConcatBytestr { span: e.span });
has_errors = true;
}
Ok(ast::LitKind::Err) | Err(()) => {
has_errors = true;
}
}
},
}
// We also want to allow negative numeric literals.
ast::ExprKind::Unary(ast::UnOp::Neg, ref expr)
if let ast::ExprKind::Lit(token_lit) = expr.kind =>
{
match ast::LitKind::from_token_lit(token_lit) {
match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, e.span)
{
Ok(ast::LitKind::Int(i, _)) => accumulator.push_str(&format!("-{i}")),
Ok(ast::LitKind::Float(f, _)) => accumulator.push_str(&format!("-{f}")),
Err(err) => {
report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span);
Err(()) => {
has_errors = true;
}
_ => missing_literal.push(e.span),
Expand Down
8 changes: 3 additions & 5 deletions compiler/rustc_builtin_macros/src/concat_bytes.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use rustc_ast as ast;
use rustc_ast::{ptr::P, tokenstream::TokenStream};
use rustc_expand::base::{self, DummyResult};
use rustc_session::errors::report_lit_error;
use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs;
use rustc_span::Span;

use crate::errors;
Expand All @@ -17,7 +17,7 @@ fn invalid_type_err(
ConcatBytesInvalid, ConcatBytesInvalidSuggestion, ConcatBytesNonU8, ConcatBytesOob,
};
let snippet = cx.sess.source_map().span_to_snippet(span).ok();
match ast::LitKind::from_token_lit(token_lit) {
match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, span) {
Ok(ast::LitKind::CStr(_, _)) => {
// Avoid ambiguity in handling of terminal `NUL` by refusing to
// concatenate C string literals as bytes.
Expand Down Expand Up @@ -60,9 +60,7 @@ fn invalid_type_err(
cx.emit_err(ConcatBytesNonU8 { span });
}
Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(),
Err(err) => {
report_lit_error(&cx.sess.parse_sess, err, token_lit, span);
}
Err(()) => {}
}
}

Expand Down
47 changes: 25 additions & 22 deletions compiler/rustc_expand/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ use rustc_errors::{
use rustc_feature::Features;
use rustc_lint_defs::builtin::PROC_MACRO_BACK_COMPAT;
use rustc_lint_defs::{BufferedEarlyLint, BuiltinLintDiagnostics, RegisteredTools};
use rustc_parse::{parser, MACRO_ARGUMENTS};
use rustc_session::errors::report_lit_error;
use rustc_parse::{self, parser, MACRO_ARGUMENTS};
use rustc_session::{parse::ParseSess, Limit, Session};
use rustc_span::def_id::{CrateNum, DefId, LocalDefId};
use rustc_span::edition::Edition;
Expand Down Expand Up @@ -1236,26 +1235,30 @@ pub fn expr_to_spanned_string<'a>(
let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr();

Err(match expr.kind {
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)),
Ok(ast::LitKind::ByteStr(..)) => {
let mut err = cx.struct_span_err(expr.span, err_msg);
let span = expr.span.shrink_to_lo();
err.span_suggestion(
span.with_hi(span.lo() + BytePos(1)),
"consider removing the leading `b`",
"",
Applicability::MaybeIncorrect,
);
Some((err, true))
}
Ok(ast::LitKind::Err) => None,
Err(err) => {
report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span);
None
}
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
},
ast::ExprKind::Lit(token_lit) => {
let res = match parser::token_lit_to_lit_kind_and_report_errs(
&cx.sess.parse_sess,
token_lit,
expr.span,
) {
Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)),
Ok(ast::LitKind::ByteStr(..)) => {
let mut err = cx.struct_span_err(expr.span, err_msg);
let span = expr.span.shrink_to_lo();
err.span_suggestion(
span.with_hi(span.lo() + BytePos(1)),
"consider removing the leading `b`",
"",
Applicability::MaybeIncorrect,
);
Some((err, true))
}
Ok(ast::LitKind::Err) => None,
Err(()) => None,
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
};
res
}
ast::ExprKind::Err => None,
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
})
Expand Down
19 changes: 16 additions & 3 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ pub enum EscapeError {
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
NonAsciiCharInByte,

// `\0` in a C string literal.
NulInCStr,

/// After a line ending with '\', the next line contains whitespace
/// characters that are not skipped.
UnskippedWhitespaceWarning,
Expand Down Expand Up @@ -122,10 +125,20 @@ where
{
match mode {
CStr => {
unescape_non_raw_common(src, mode, callback);
unescape_non_raw_common(src, mode, &mut |r, mut result| {
if let Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) = result {
result = Err(EscapeError::NulInCStr);
}
callback(r, result)
});
}
RawCStr => {
check_raw_common(src, mode, &mut |r, result| callback(r, result.map(CStrUnit::Char)));
check_raw_common(src, mode, &mut |r, mut result| {
if let Ok('\0') = result {
result = Err(EscapeError::NulInCStr);
}
callback(r, result.map(CStrUnit::Char))
});
}
Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable!(),
}
Expand Down Expand Up @@ -335,7 +348,7 @@ where
// them in the range computation.
while let Some(c) = chars.next() {
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
let res: Result<T, EscapeError> = match c {
'\\' => {
match chars.clone().next() {
Some('\n') => {
Expand Down
31 changes: 31 additions & 0 deletions compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ parse_bare_cr = {$double_quotes ->

parse_bare_cr_in_raw_string = bare CR not allowed in raw string

parse_binary_float_literal_not_supported = binary float literal is not supported

parse_bounds_not_allowed_on_trait_aliases = bounds are not allowed on trait aliases

parse_box_not_pat = expected pattern, found {$descr}
Expand Down Expand Up @@ -292,7 +294,11 @@ parse_generic_parameters_without_angle_brackets = generic parameters without sur
parse_generics_in_path = unexpected generic arguments in path

parse_help_set_edition_cargo = set `edition = "{$edition}"` in `Cargo.toml`

parse_help_set_edition_standalone = pass `--edition {$edition}` to `rustc`

parse_hexadecimal_float_literal_not_supported = hexadecimal float literal is not supported

parse_if_expression_missing_condition = missing condition for `if` expression
.condition_label = expected condition here
.block_label = if this block is the condition of the `if` expression, then it must be followed by another block
Expand Down Expand Up @@ -364,6 +370,9 @@ parse_inner_doc_comment_not_permitted = expected outer doc comment
.label_does_not_annotate_this = the inner doc comment doesn't annotate this {$item}
.sugg_change_inner_to_outer = to annotate the {$item}, change the doc comment from inner to outer style

parse_int_literal_too_large = integer literal is too large
.note = value exceeds limit of `{$limit}`

parse_invalid_block_macro_segment = cannot use a `block` macro fragment here
.label = the `block` fragment is within this context
.suggestion = wrap this in another block
Expand All @@ -388,8 +397,18 @@ parse_invalid_dyn_keyword = invalid `dyn` keyword
.suggestion = remove this keyword

parse_invalid_expression_in_let_else = a `{$operator}` expression cannot be directly assigned in `let...else`
parse_invalid_float_literal_suffix = invalid suffix `{$suffix}` for float literal
.label = invalid suffix `{$suffix}`
.help = valid suffixes are `f32` and `f64`

parse_invalid_float_literal_width = invalid width `{$width}` for float literal
.help = valid widths are 32 and 64

parse_invalid_identifier_with_leading_number = identifiers cannot start with a number

parse_invalid_int_literal_width = invalid width `{$width}` for integer literal
.help = valid widths are 8, 16, 32, 64 and 128

parse_invalid_interpolated_expression = invalid interpolated expression

parse_invalid_literal_suffix = suffixes on {$kind} literals are invalid
Expand All @@ -408,6 +427,14 @@ parse_invalid_logical_operator = `{$incorrect}` is not a logical operator

parse_invalid_meta_item = expected unsuffixed literal or identifier, found `{$token}`

parse_invalid_num_literal_base_prefix = invalid base prefix for number literal
.note = base prefixes (`0xff`, `0b1010`, `0o755`) are lowercase
.suggestion = try making the prefix lowercase

parse_invalid_num_literal_suffix = invalid suffix `{$suffix}` for number literal
.label = invalid suffix `{$suffix}`
.help = the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.)

parse_invalid_unicode_escape = invalid unicode character escape
.label = invalid escape
.help = unicode escape must {$surrogate ->
Expand Down Expand Up @@ -612,6 +639,10 @@ parse_note_mut_pattern_usage = `mut` may be followed by `variable` and `variable

parse_note_pattern_alternatives_use_single_vert = alternatives in or-patterns are separated with `|`, not `||`

parse_nul_in_c_str = null characters in C string literals are not supported

parse_octal_float_literal_not_supported = octal float literal is not supported

parse_or_pattern_not_allowed_in_fn_parameters = top-level or-patterns are not allowed in function parameters
parse_or_pattern_not_allowed_in_let_binding = top-level or-patterns are not allowed in `let` bindings
parse_out_of_range_hex_escape = out of range hex escape
Expand Down
Loading
Loading