diff --git a/Cargo.lock b/Cargo.lock index dbe5b2ec6b75c..2c08e9a8df096 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3428,6 +3428,7 @@ dependencies = [ "rustc_index", "rustc_macros", "rustc_middle", + "rustc_parse", "rustc_session", "rustc_span", "rustc_target", diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 92b9adf1db751..663f8bdbf644d 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -3,8 +3,7 @@ use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; use rustc_lexer::unescape::{ - byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, - Mode, + byte_from_char, unescape_c_string, unescape_literal, CStrUnit, EscapeError, Mode, }; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; @@ -34,169 +33,268 @@ pub fn escape_byte_str_symbol(bytes: &[u8]) -> Symbol { #[derive(Debug)] pub enum LitError { LexerError, + EscapeError { + mode: Mode, + // Length before the string content, e.g. 1 for "a", 5 for br##"a"## + prefix_len: u32, + // The range is the byte range of the bad character, using a zero index. + range: Range, + err: EscapeError, + }, InvalidSuffix, InvalidIntSuffix, InvalidFloatSuffix, NonDecimalFloat(u32), IntTooLarge(u32), - NulInCStr(Range), } impl LitKind { - /// Converts literal token into a semantic literal. - pub fn from_token_lit(lit: token::Lit) -> Result { + /// Converts literal token into a semantic literal. The return value has + /// two parts: + /// - The `Result` indicates success or failure. + /// - The `Vec` contains all found errors and warnings. + /// + /// If we only had to deal with errors, we could use the more obvious + /// `Result>`; on failure the caller would just + /// (optionally) print errors and take the error path and stop early. But + /// it's possible to succeed with zero errors and one or more warnings, and + /// in that case the caller should (optionally) print the warnings, but + /// also proceed with a valid `LitKind`. This return type facilitates that. + pub fn from_token_lit_with_errs(lit: token::Lit) -> (Result, Vec) { let token::Lit { kind, symbol, suffix } = lit; if suffix.is_some() && !kind.may_have_suffix() { - return Err(LitError::InvalidSuffix); + // Note: we return a single error here. We could instead continue + // processing, possibly returning multiple errors. + return (Err(()), vec![LitError::InvalidSuffix]); } - Ok(match kind { + let mut errs = vec![]; + let mut has_fatal = false; + + let res = match kind { token::Bool => { assert!(symbol.is_bool_lit()); - LitKind::Bool(symbol == kw::True) + Ok(LitKind::Bool(symbol == kw::True)) } token::Byte => { - return unescape_byte(symbol.as_str()) - .map(LitKind::Byte) - .map_err(|_| LitError::LexerError); + let mode = Mode::Byte; + let mut res = None; + unescape_literal(symbol.as_str(), mode, &mut |range, unescaped_char| { + match unescaped_char { + Ok(c) => res = Some(c), + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // b' + range, + err, + }); + } + } + }); + if !has_fatal { Ok(LitKind::Byte(byte_from_char(res.unwrap()))) } else { Err(()) } } token::Char => { - return unescape_char(symbol.as_str()) - .map(LitKind::Char) - .map_err(|_| LitError::LexerError); + let mode = Mode::Char; + let mut res = None; + unescape_literal(symbol.as_str(), mode, &mut |range, unescaped_char| { + match unescaped_char { + Ok(c) => res = Some(c), + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 1, // ' + range, + err, + }); + } + } + }); + if !has_fatal { Ok(LitKind::Char(res.unwrap())) } else { Err(()) } } // There are some valid suffixes for integer and float literals, // so all the handling is done internally. - token::Integer => return integer_lit(symbol, suffix), - token::Float => return float_lit(symbol, suffix), + token::Integer => { + return match integer_lit(symbol, suffix) { + Ok(lit_kind) => (Ok(lit_kind), vec![]), + Err(err) => (Err(()), vec![err]), + }; + } + token::Float => { + return match float_lit(symbol, suffix) { + Ok(lit_kind) => (Ok(lit_kind), vec![]), + Err(err) => (Err(()), vec![err]), + }; + } token::Str => { // If there are no characters requiring special treatment we can // reuse the symbol from the token. Otherwise, we must generate a // new symbol because the string in the LitKind is different to the // string in the token. + let mode = Mode::Str; let s = symbol.as_str(); // Vanilla strings are so common we optimize for the common case where no chars // requiring special behaviour are present. - let symbol = if s.contains(['\\', '\r']) { + if s.contains(['\\', '\r']) { let mut buf = String::with_capacity(s.len()); - let mut error = Ok(()); // Force-inlining here is aggressive but the closure is // called on every char in the string, so it can be // hot in programs with many long strings. unescape_literal( s, - Mode::Str, + mode, &mut #[inline(always)] - |_, unescaped_char| match unescaped_char { + |range, unescaped_char| match unescaped_char { Ok(c) => buf.push(c), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 1, // " + range, + err, + }); } }, ); - error?; - Symbol::intern(&buf) + if !has_fatal { + Ok(LitKind::Str(Symbol::intern(&buf), ast::StrStyle::Cooked)) + } else { + Err(()) + } } else { - symbol - }; - LitKind::Str(symbol, ast::StrStyle::Cooked) + Ok(LitKind::Str(symbol, ast::StrStyle::Cooked)) + } } token::StrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we // can reuse the symbol on success. - let mut error = Ok(()); - unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| { - match unescaped_char { - Ok(_) => {} - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } - } + let mode = Mode::RawStr; + let s = symbol.as_str(); + unescape_literal(s, mode, &mut |range, unescaped_char| match unescaped_char { + Ok(_) => {} + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2 + n as u32, // r", r#", r##", etc. + range, + err, + }); } }); - error?; - LitKind::Str(symbol, ast::StrStyle::Raw(n)) + if !has_fatal { Ok(LitKind::Str(symbol, ast::StrStyle::Raw(n))) } else { Err(()) } } token::ByteStr => { + let mode = Mode::ByteStr; let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_literal(s, Mode::ByteStr, &mut |_, c| match c { + unescape_literal(s, mode, &mut |range, c| match c { Ok(c) => buf.push(byte_from_char(c)), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // b" + range, + err, + }); } }); - error?; - LitKind::ByteStr(buf.into(), StrStyle::Cooked) + if !has_fatal { + Ok(LitKind::ByteStr(buf.into(), StrStyle::Cooked)) + } else { + Err(()) + } } token::ByteStrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we // can convert the symbol directly to a `Lrc` on success. + let mode = Mode::RawByteStr; let s = symbol.as_str(); - let mut error = Ok(()); - unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c { + unescape_literal(s, mode, &mut |range, c| match c { Ok(_) => {} Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 3 + n as u32, // br", br#", br##", etc. + range, + err, + }); } }); - LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n)) + if !has_fatal { + Ok(LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))) + } else { + Err(()) + } } token::CStr => { + let mode = Mode::CStr; let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_c_string(s, Mode::CStr, &mut |span, c| match c { - Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { - error = Err(LitError::NulInCStr(span)); - } + unescape_c_string(s, mode, &mut |range, c| match c { Ok(CStrUnit::Byte(b)) => buf.push(b), Ok(CStrUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) } Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // c" + range, + err, + }); } }); - error?; - buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Cooked) + if !has_fatal { + buf.push(0); + Ok(LitKind::CStr(buf.into(), StrStyle::Cooked)) + } else { + Err(()) + } } token::CStrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we - // can convert the symbol directly to a `Lrc` on success. + // can convert the symbol directly to a `Lrc` (after appending a nul char) on + // success. + let mode = Mode::RawCStr; let s = symbol.as_str(); - let mut error = Ok(()); - unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c { - Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { - error = Err(LitError::NulInCStr(span)); - } + unescape_c_string(s, mode, &mut |range, c| match c { Ok(_) => {} Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 3 + n as u32, // cr", cr#", cr##", etc. + range, + err, + }); } }); - error?; - let mut buf = s.to_owned().into_bytes(); - buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Raw(n)) + if !has_fatal { + let mut buf = s.to_owned().into_bytes(); + buf.push(0); + Ok(LitKind::CStr(buf.into(), StrStyle::Raw(n))) + } else { + Err(()) + } } - token::Err => LitKind::Err, - }) + token::Err => Ok(LitKind::Err), + }; + (res, errs) + } + + // Use this one for call sites where we don't need to print error messages + // about invalid literals. + pub fn from_token_lit(lit: token::Lit) -> Result { + LitKind::from_token_lit_with_errs(lit).0 } } @@ -264,14 +362,26 @@ impl fmt::Display for LitKind { } impl MetaItemLit { - /// Converts a token literal into a meta item literal. - pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result { - Ok(MetaItemLit { + /// Converts a token literal into a meta item literal. See + /// `LitKind::from_token_lit` for an explanation of the return type. + pub fn from_token_lit_with_errs( + token_lit: token::Lit, + span: Span, + ) -> (Result, Vec) { + let (lit, errs) = LitKind::from_token_lit_with_errs(token_lit); + let lit = lit.map(|kind| MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, - kind: LitKind::from_token_lit(token_lit)?, + kind, span, - }) + }); + (lit, errs) + } + + // Use this one for call sites where we don't need to print error messages + // about invalid literals. + pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result { + MetaItemLit::from_token_lit_with_errs(token_lit, span).0 } /// Cheaply converts a meta item literal into a token literal. diff --git a/compiler/rustc_ast_lowering/Cargo.toml b/compiler/rustc_ast_lowering/Cargo.toml index 8cc4521e0a78d..285468bae1f84 100644 --- a/compiler/rustc_ast_lowering/Cargo.toml +++ b/compiler/rustc_ast_lowering/Cargo.toml @@ -17,6 +17,7 @@ rustc_hir = { path = "../rustc_hir" } rustc_index = { path = "../rustc_index" } rustc_macros = { path = "../rustc_macros" } rustc_middle = { path = "../rustc_middle" } +rustc_parse = { path = "../rustc_parse" } rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } rustc_target = { path = "../rustc_target" } diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index 11b5131b8d788..7b87a2cf676ba 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -14,7 +14,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_hir as hir; use rustc_hir::def::{DefKind, Res}; use rustc_middle::span_bug; -use rustc_session::errors::report_lit_error; +use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs; use rustc_span::source_map::{respan, Spanned}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::DUMMY_SP; @@ -119,13 +119,12 @@ impl<'hir> LoweringContext<'_, 'hir> { hir::ExprKind::Unary(op, ohs) } ExprKind::Lit(token_lit) => { - let lit_kind = match LitKind::from_token_lit(*token_lit) { - Ok(lit_kind) => lit_kind, - Err(err) => { - report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span); - LitKind::Err - } - }; + let lit_kind = token_lit_to_lit_kind_and_report_errs( + &self.tcx.sess.parse_sess, + *token_lit, + e.span, + ) + .unwrap_or(LitKind::Err); let lit = self.arena.alloc(respan(self.lower_span(e.span), lit_kind)); hir::ExprKind::Lit(lit) } diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs index 6b8330bfdaf92..e926418b7d065 100644 --- a/compiler/rustc_builtin_macros/src/concat.rs +++ b/compiler/rustc_builtin_macros/src/concat.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::tokenstream::TokenStream; use rustc_expand::base::{self, DummyResult}; -use rustc_session::errors::report_lit_error; +use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs; use rustc_span::symbol::Symbol; use crate::errors; @@ -19,44 +19,43 @@ pub fn expand_concat( let mut has_errors = false; for e in es { match e.kind { - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { - Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => { - accumulator.push_str(s.as_str()); - } - Ok(ast::LitKind::Char(c)) => { - accumulator.push(c); - } - Ok(ast::LitKind::Int(i, _)) => { - accumulator.push_str(&i.to_string()); - } - Ok(ast::LitKind::Bool(b)) => { - accumulator.push_str(&b.to_string()); - } - Ok(ast::LitKind::CStr(..)) => { - cx.emit_err(errors::ConcatCStrLit { span: e.span }); - has_errors = true; - } - Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { - cx.emit_err(errors::ConcatBytestr { span: e.span }); - has_errors = true; - } - Ok(ast::LitKind::Err) => { - has_errors = true; - } - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span); - has_errors = true; + ast::ExprKind::Lit(token_lit) => { + match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, e.span) + { + Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => { + accumulator.push_str(s.as_str()); + } + Ok(ast::LitKind::Char(c)) => { + accumulator.push(c); + } + Ok(ast::LitKind::Int(i, _)) => { + accumulator.push_str(&i.to_string()); + } + Ok(ast::LitKind::Bool(b)) => { + accumulator.push_str(&b.to_string()); + } + Ok(ast::LitKind::CStr(..)) => { + cx.emit_err(errors::ConcatCStrLit { span: e.span }); + has_errors = true; + } + Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { + cx.emit_err(errors::ConcatBytestr { span: e.span }); + has_errors = true; + } + Ok(ast::LitKind::Err) | Err(()) => { + has_errors = true; + } } - }, + } // We also want to allow negative numeric literals. ast::ExprKind::Unary(ast::UnOp::Neg, ref expr) if let ast::ExprKind::Lit(token_lit) = expr.kind => { - match ast::LitKind::from_token_lit(token_lit) { + match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, e.span) + { Ok(ast::LitKind::Int(i, _)) => accumulator.push_str(&format!("-{i}")), Ok(ast::LitKind::Float(f, _)) => accumulator.push_str(&format!("-{f}")), - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span); + Err(()) => { has_errors = true; } _ => missing_literal.push(e.span), diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index 96e9584c20955..f4d3bd458fc19 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::{ptr::P, tokenstream::TokenStream}; use rustc_expand::base::{self, DummyResult}; -use rustc_session::errors::report_lit_error; +use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs; use rustc_span::Span; use crate::errors; @@ -17,7 +17,7 @@ fn invalid_type_err( ConcatBytesInvalid, ConcatBytesInvalidSuggestion, ConcatBytesNonU8, ConcatBytesOob, }; let snippet = cx.sess.source_map().span_to_snippet(span).ok(); - match ast::LitKind::from_token_lit(token_lit) { + match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, span) { Ok(ast::LitKind::CStr(_, _)) => { // Avoid ambiguity in handling of terminal `NUL` by refusing to // concatenate C string literals as bytes. @@ -60,9 +60,7 @@ fn invalid_type_err( cx.emit_err(ConcatBytesNonU8 { span }); } Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(), - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, span); - } + Err(()) => {} } } diff --git a/compiler/rustc_expand/src/base.rs b/compiler/rustc_expand/src/base.rs index b63609c48e912..054908eed3a49 100644 --- a/compiler/rustc_expand/src/base.rs +++ b/compiler/rustc_expand/src/base.rs @@ -21,8 +21,7 @@ use rustc_errors::{ use rustc_feature::Features; use rustc_lint_defs::builtin::PROC_MACRO_BACK_COMPAT; use rustc_lint_defs::{BufferedEarlyLint, BuiltinLintDiagnostics, RegisteredTools}; -use rustc_parse::{parser, MACRO_ARGUMENTS}; -use rustc_session::errors::report_lit_error; +use rustc_parse::{self, parser, MACRO_ARGUMENTS}; use rustc_session::{parse::ParseSess, Limit, Session}; use rustc_span::def_id::{CrateNum, DefId, LocalDefId}; use rustc_span::edition::Edition; @@ -1236,26 +1235,30 @@ pub fn expr_to_spanned_string<'a>( let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr(); Err(match expr.kind { - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { - Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), - Ok(ast::LitKind::ByteStr(..)) => { - let mut err = cx.struct_span_err(expr.span, err_msg); - let span = expr.span.shrink_to_lo(); - err.span_suggestion( - span.with_hi(span.lo() + BytePos(1)), - "consider removing the leading `b`", - "", - Applicability::MaybeIncorrect, - ); - Some((err, true)) - } - Ok(ast::LitKind::Err) => None, - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span); - None - } - _ => Some((cx.struct_span_err(expr.span, err_msg), false)), - }, + ast::ExprKind::Lit(token_lit) => { + let res = match parser::token_lit_to_lit_kind_and_report_errs( + &cx.sess.parse_sess, + token_lit, + expr.span, + ) { + Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), + Ok(ast::LitKind::ByteStr(..)) => { + let mut err = cx.struct_span_err(expr.span, err_msg); + let span = expr.span.shrink_to_lo(); + err.span_suggestion( + span.with_hi(span.lo() + BytePos(1)), + "consider removing the leading `b`", + "", + Applicability::MaybeIncorrect, + ); + Some((err, true)) + } + Ok(ast::LitKind::Err) => None, + Err(()) => None, + _ => Some((cx.struct_span_err(expr.span, err_msg), false)), + }; + res + } ast::ExprKind::Err => None, _ => Some((cx.struct_span_err(expr.span, err_msg), false)), }) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index abec12f52a6e6..06999dae447b4 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -59,6 +59,9 @@ pub enum EscapeError { /// Non-ascii character in byte literal, byte string literal, or raw byte string literal. NonAsciiCharInByte, + // `\0` in a C string literal. + NulInCStr, + /// After a line ending with '\', the next line contains whitespace /// characters that are not skipped. UnskippedWhitespaceWarning, @@ -122,10 +125,20 @@ where { match mode { CStr => { - unescape_non_raw_common(src, mode, callback); + unescape_non_raw_common(src, mode, &mut |r, mut result| { + if let Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) = result { + result = Err(EscapeError::NulInCStr); + } + callback(r, result) + }); } RawCStr => { - check_raw_common(src, mode, &mut |r, result| callback(r, result.map(CStrUnit::Char))); + check_raw_common(src, mode, &mut |r, mut result| { + if let Ok('\0') = result { + result = Err(EscapeError::NulInCStr); + } + callback(r, result.map(CStrUnit::Char)) + }); } Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable!(), } @@ -335,7 +348,7 @@ where // them in the range computation. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { + let res: Result = match c { '\\' => { match chars.clone().next() { Some('\n') => { diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index 363b8f4bfb9cc..b6b8f108bbb41 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -59,6 +59,8 @@ parse_bare_cr = {$double_quotes -> parse_bare_cr_in_raw_string = bare CR not allowed in raw string +parse_binary_float_literal_not_supported = binary float literal is not supported + parse_bounds_not_allowed_on_trait_aliases = bounds are not allowed on trait aliases parse_box_not_pat = expected pattern, found {$descr} @@ -292,7 +294,11 @@ parse_generic_parameters_without_angle_brackets = generic parameters without sur parse_generics_in_path = unexpected generic arguments in path parse_help_set_edition_cargo = set `edition = "{$edition}"` in `Cargo.toml` + parse_help_set_edition_standalone = pass `--edition {$edition}` to `rustc` + +parse_hexadecimal_float_literal_not_supported = hexadecimal float literal is not supported + parse_if_expression_missing_condition = missing condition for `if` expression .condition_label = expected condition here .block_label = if this block is the condition of the `if` expression, then it must be followed by another block @@ -364,6 +370,9 @@ parse_inner_doc_comment_not_permitted = expected outer doc comment .label_does_not_annotate_this = the inner doc comment doesn't annotate this {$item} .sugg_change_inner_to_outer = to annotate the {$item}, change the doc comment from inner to outer style +parse_int_literal_too_large = integer literal is too large + .note = value exceeds limit of `{$limit}` + parse_invalid_block_macro_segment = cannot use a `block` macro fragment here .label = the `block` fragment is within this context .suggestion = wrap this in another block @@ -388,8 +397,18 @@ parse_invalid_dyn_keyword = invalid `dyn` keyword .suggestion = remove this keyword parse_invalid_expression_in_let_else = a `{$operator}` expression cannot be directly assigned in `let...else` +parse_invalid_float_literal_suffix = invalid suffix `{$suffix}` for float literal + .label = invalid suffix `{$suffix}` + .help = valid suffixes are `f32` and `f64` + +parse_invalid_float_literal_width = invalid width `{$width}` for float literal + .help = valid widths are 32 and 64 + parse_invalid_identifier_with_leading_number = identifiers cannot start with a number +parse_invalid_int_literal_width = invalid width `{$width}` for integer literal + .help = valid widths are 8, 16, 32, 64 and 128 + parse_invalid_interpolated_expression = invalid interpolated expression parse_invalid_literal_suffix = suffixes on {$kind} literals are invalid @@ -408,6 +427,14 @@ parse_invalid_logical_operator = `{$incorrect}` is not a logical operator parse_invalid_meta_item = expected unsuffixed literal or identifier, found `{$token}` +parse_invalid_num_literal_base_prefix = invalid base prefix for number literal + .note = base prefixes (`0xff`, `0b1010`, `0o755`) are lowercase + .suggestion = try making the prefix lowercase + +parse_invalid_num_literal_suffix = invalid suffix `{$suffix}` for number literal + .label = invalid suffix `{$suffix}` + .help = the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.) + parse_invalid_unicode_escape = invalid unicode character escape .label = invalid escape .help = unicode escape must {$surrogate -> @@ -612,6 +639,10 @@ parse_note_mut_pattern_usage = `mut` may be followed by `variable` and `variable parse_note_pattern_alternatives_use_single_vert = alternatives in or-patterns are separated with `|`, not `||` +parse_nul_in_c_str = null characters in C string literals are not supported + +parse_octal_float_literal_not_supported = octal float literal is not supported + parse_or_pattern_not_allowed_in_fn_parameters = top-level or-patterns are not allowed in function parameters parse_or_pattern_not_allowed_in_let_binding = top-level or-patterns are not allowed in `let` bindings parse_out_of_range_hex_escape = out of range hex escape diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 008adcc83d0ea..96cd5eee67ea3 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -2138,6 +2138,11 @@ pub enum UnescapeError { #[subdiagnostic] suggestion: MoreThanOneCharSugg, }, + #[diag(parse_nul_in_c_str)] + NulInCStr { + #[primary_span] + span: Span, + }, } #[derive(Subdiagnostic)] @@ -2897,3 +2902,95 @@ pub(crate) struct TransposeDynOrImplSugg<'a> { pub insertion_span: Span, pub kw: &'a str, } + +#[derive(Diagnostic)] +#[diag(parse_invalid_literal_suffix)] +pub(crate) struct InvalidLiteralSuffix<'a> { + #[primary_span] + #[label] + pub span: Span, + // FIXME(#100717) + pub kind: &'a str, + pub suffix: Symbol, +} + +#[derive(Diagnostic)] +#[diag(parse_invalid_int_literal_width)] +#[help] +pub(crate) struct InvalidIntLiteralWidth { + #[primary_span] + pub span: Span, + pub width: String, +} + +#[derive(Diagnostic)] +#[diag(parse_invalid_num_literal_base_prefix)] +#[note] +pub(crate) struct InvalidNumLiteralBasePrefix { + #[primary_span] + #[suggestion(applicability = "maybe-incorrect", code = "{fixed}")] + pub span: Span, + pub fixed: String, +} + +#[derive(Diagnostic)] +#[diag(parse_invalid_num_literal_suffix)] +#[help] +pub(crate) struct InvalidNumLiteralSuffix { + #[primary_span] + #[label] + pub span: Span, + pub suffix: String, +} + +#[derive(Diagnostic)] +#[diag(parse_invalid_float_literal_width)] +#[help] +pub(crate) struct InvalidFloatLiteralWidth { + #[primary_span] + pub span: Span, + pub width: String, +} + +#[derive(Diagnostic)] +#[diag(parse_invalid_float_literal_suffix)] +#[help] +pub(crate) struct InvalidFloatLiteralSuffix { + #[primary_span] + #[label] + pub span: Span, + pub suffix: String, +} + +#[derive(Diagnostic)] +#[diag(parse_hexadecimal_float_literal_not_supported)] +pub(crate) struct HexadecimalFloatLiteralNotSupported { + #[primary_span] + #[label(parse_not_supported)] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_octal_float_literal_not_supported)] +pub(crate) struct OctalFloatLiteralNotSupported { + #[primary_span] + #[label(parse_not_supported)] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_binary_float_literal_not_supported)] +pub(crate) struct BinaryFloatLiteralNotSupported { + #[primary_span] + #[label(parse_not_supported)] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_int_literal_too_large)] +#[note] +pub(crate) struct IntLiteralTooLarge { + #[primary_span] + pub span: Span, + pub limit: String, +} diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index c158edaac2554..96fae62e5d439 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,5 +1,3 @@ -use std::ops::Range; - use crate::errors; use crate::lexer::unicode_chars::UNICODE_ARRAY; use crate::make_unclosed_delims_error; @@ -8,7 +6,6 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; -use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, DocStyle, RawStrError}; use rustc_lexer::{Cursor, LiteralKind}; use rustc_session::lint::builtin::{ @@ -21,10 +18,10 @@ use rustc_span::{edition::Edition, BytePos, Pos, Span}; mod diagnostics; mod tokentrees; -mod unescape_error_reporting; +pub(crate) mod unescape_error_reporting; mod unicode_chars; -use unescape_error_reporting::{emit_unescape_error, escaped_char}; +use unescape_error_reporting::escaped_char; // This type is used a lot. Make sure it doesn't unintentionally get bigger. // @@ -409,7 +406,7 @@ impl<'a> StringReader<'a> { error_code!(E0762), ) } - self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' ' + self.cook_quoted(token::Char, start, end, 1, 1) // ' ' } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { @@ -419,7 +416,7 @@ impl<'a> StringReader<'a> { error_code!(E0763), ) } - self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' + self.cook_quoted(token::Byte, start, end, 2, 1) // b' ' } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { @@ -429,7 +426,7 @@ impl<'a> StringReader<'a> { error_code!(E0765), ) } - self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " " + self.cook_quoted(token::Str, start, end, 1, 1) // " " } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { @@ -439,7 +436,7 @@ impl<'a> StringReader<'a> { error_code!(E0766), ) } - self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " + self.cook_quoted(token::ByteStr, start, end, 2, 1) // b" " } rustc_lexer::LiteralKind::CStr { terminated } => { if !terminated { @@ -449,13 +446,13 @@ impl<'a> StringReader<'a> { error_code!(E0767), ) } - self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + self.cook_quoted(token::CStr, start, end, 2, 1) // c" " } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::StrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## + self.cook_quoted(kind, start, end, 2 + n, 1 + n) // r##" "## } else { self.report_raw_str_error(start, 1); } @@ -464,7 +461,7 @@ impl<'a> StringReader<'a> { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::ByteStrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## + self.cook_quoted(kind, start, end, 3 + n, 1 + n) // br##" "## } else { self.report_raw_str_error(start, 2); } @@ -473,7 +470,7 @@ impl<'a> StringReader<'a> { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::CStrRaw(n_hashes); - self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + self.cook_quoted(kind, start, end, 3 + n, 1 + n) // cr##" "## } else { self.report_raw_str_error(start, 2); } @@ -693,82 +690,18 @@ impl<'a> StringReader<'a> { self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num }); } - fn cook_common( + fn cook_quoted( &self, kind: token::LitKind, - mode: Mode, start: BytePos, end: BytePos, prefix_len: u32, postfix_len: u32, - unescape: fn(&str, Mode, &mut dyn FnMut(Range, Result<(), EscapeError>)), ) -> (token::LitKind, Symbol) { - let mut has_fatal_err = false; let content_start = start + BytePos(prefix_len); let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); - unescape(lit_content, mode, &mut |range, result| { - // Here we only check for errors. The actual unescaping is done later. - if let Err(err) = result { - let span_with_quotes = self.mk_sp(start, end); - let (start, end) = (range.start as u32, range.end as u32); - let lo = content_start + BytePos(start); - let hi = lo + BytePos(end - start); - let span = self.mk_sp(lo, hi); - if err.is_fatal() { - has_fatal_err = true; - } - emit_unescape_error( - &self.sess.dcx, - lit_content, - span_with_quotes, - span, - mode, - range, - err, - ); - } - }); - - // We normally exclude the quotes for the symbol, but for errors we - // include it because it results in clearer error messages. - if !has_fatal_err { - (kind, Symbol::intern(lit_content)) - } else { - (token::Err, self.symbol_from_to(start, end)) - } - } - - fn cook_quoted( - &self, - kind: token::LitKind, - mode: Mode, - start: BytePos, - end: BytePos, - prefix_len: u32, - postfix_len: u32, - ) -> (token::LitKind, Symbol) { - self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_literal(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) - }) - } - - fn cook_c_string( - &self, - kind: token::LitKind, - mode: Mode, - start: BytePos, - end: BytePos, - prefix_len: u32, - postfix_len: u32, - ) -> (token::LitKind, Symbol) { - self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_c_string(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) - }) + (kind, Symbol::intern(lit_content)) } } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 775082adbe81e..3425fa16cfeb0 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -15,22 +15,28 @@ pub(crate) fn emit_unescape_error( lit: &str, // full span of the literal, including quotes and any prefix full_lit_span: Span, - // span of the error part of the literal - err_span: Span, mode: Mode, + prefix_len: u32, // range of the error inside `lit` range: Range, error: EscapeError, ) { + let (start, end) = (range.start as u32, range.end as u32); + let lo = full_lit_span.lo() + BytePos(prefix_len) + BytePos(start); + let hi = lo + BytePos(end - start); + let err_span = full_lit_span.with_lo(lo).with_hi(hi); + debug!( - "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", - lit, full_lit_span, mode, range, error + "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}, {:?}", + lit, full_lit_span, err_span, mode, range, error ); + let last_char = || { let c = lit[range.clone()].chars().next_back().unwrap(); let span = err_span.with_lo(err_span.hi() - BytePos(c.len_utf8() as u32)); (c, span) }; + match error { EscapeError::LoneSurrogateUnicodeEscape => { dcx.emit_err(UnescapeError::InvalidUnicodeEscape { span: err_span, surrogate: true }); @@ -262,6 +268,9 @@ pub(crate) fn emit_unescape_error( EscapeError::LoneSlash => { dcx.emit_err(UnescapeError::LoneSlash(err_span)); } + EscapeError::NulInCStr => { + dcx.emit_err(UnescapeError::NulInCStr { span: err_span }); + } EscapeError::UnskippedWhitespaceWarning => { let (c, char_span) = last_char(); dcx.emit_warning(UnescapeError::UnskippedWhitespace { diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index cd3e8b92f2f9e..f8cd207e85a25 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -8,6 +8,7 @@ use super::{ }; use crate::errors; +use crate::lexer::unescape_error_reporting::emit_unescape_error; use crate::maybe_recover_from_interpolated_ty_qpath; use ast::mut_visit::{noop_visit_expr, MutVisitor}; use ast::{CoroutineKind, GenBlockKind, Pat, Path, PathSegment}; @@ -17,6 +18,7 @@ use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::Spacing; use rustc_ast::util::case::Case; use rustc_ast::util::classify; +use rustc_ast::util::literal::LitError; use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity}; use rustc_ast::visit::Visitor; use rustc_ast::{self as ast, AttrStyle, AttrVec, CaptureBy, ExprField, UnOp, DUMMY_NODE_ID}; @@ -30,9 +32,10 @@ use rustc_errors::{ PResult, StashKey, }; use rustc_macros::Subdiagnostic; -use rustc_session::errors::{report_lit_error, ExprParenthesesNeeded}; +use rustc_session::errors::ExprParenthesesNeeded; use rustc_session::lint::builtin::BREAK_WITH_LABEL_AND_LOOP; use rustc_session::lint::BuiltinLintDiagnostics; +use rustc_session::parse::ParseSess; use rustc_span::source_map::{self, Spanned}; use rustc_span::symbol::kw::PathRoot; use rustc_span::symbol::{kw, sym, Ident, Symbol}; @@ -2046,27 +2049,30 @@ impl<'a> Parser<'a> { let recovered = self.recover_after_dot(); let token = recovered.as_ref().unwrap_or(&self.token); match token::Lit::from_token(token) { - Some(lit) => { - match MetaItemLit::from_token_lit(lit, token.span) { + Some(token_lit) => { + let err_span = token.uninterpolated_span(); + let lit = token_lit_to_meta_item_lit_and_report_errs( + self.sess, token_lit, token.span, err_span, + ); + + let res = match lit { Ok(lit) => { self.bump(); - Some(lit) + lit } - Err(err) => { - let span = token.uninterpolated_span(); + Err(()) => { self.bump(); - report_lit_error(self.sess, err, lit, span); // Pack possible quotes and prefixes from the original literal into // the error literal's symbol so they can be pretty-printed faithfully. - let suffixless_lit = token::Lit::new(lit.kind, lit.symbol, None); + let suffixless_lit = + token::Lit::new(token_lit.kind, token_lit.symbol, None); let symbol = Symbol::intern(&suffixless_lit.to_string()); - let lit = token::Lit::new(token::Err, symbol, lit.suffix); - Some( - MetaItemLit::from_token_lit(lit, span) - .unwrap_or_else(|_| unreachable!()), - ) + let lit = token::Lit::new(token::Err, symbol, token_lit.suffix); + MetaItemLit::from_token_lit(lit, err_span) + .unwrap_or_else(|_| unreachable!()) } - } + }; + Some(res) } None => None, } @@ -3672,6 +3678,120 @@ impl<'a> Parser<'a> { } } +// Use this for call sites where we need to print errors about invalid literals. +pub fn token_lit_to_lit_kind_and_report_errs( + sess: &ParseSess, + token_lit: token::Lit, + span: Span, +) -> Result { + let (lit, errs) = ast::LitKind::from_token_lit_with_errs(token_lit); + for err in errs { + report_lit_error(sess, err, token_lit, span); + } + lit +} + +// Use this for call sites where we need to print errors about invalid literals. +pub fn token_lit_to_meta_item_lit_and_report_errs( + sess: &ParseSess, + token_lit: token::Lit, + lit_span: Span, + err_span: Span, +) -> Result { + let (lit, errs) = ast::MetaItemLit::from_token_lit_with_errs(token_lit, lit_span); + for err in errs { + report_lit_error(sess, err, token_lit, err_span); + } + lit +} + +fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { + // Checks if `s` looks like i32 or u1234 etc. + fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) + } + + // Try to lowercase the prefix if the prefix and suffix are valid. + fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option { + let mut chars = suffix.chars(); + + let base_char = chars.next().unwrap(); + let base = match base_char { + 'B' => 2, + 'O' => 8, + 'X' => 16, + _ => return None, + }; + + // check that the suffix contains only base-appropriate characters + let valid = prefix == "0" + && chars + .filter(|c| *c != '_') + .take_while(|c| *c != 'i' && *c != 'u') + .all(|c| c.to_digit(base).is_some()); + + valid.then(|| format!("0{}{}", base_char.to_ascii_lowercase(), &suffix[1..])) + } + + let token::Lit { kind, symbol, suffix, .. } = lit; + match err { + // `LexerError` is an error, but it was already reported + // by lexer, so here we don't report it the second time. + LitError::LexerError => {} + LitError::EscapeError { mode, prefix_len, range, err } => { + emit_unescape_error(&sess.dcx, symbol.as_str(), span, mode, prefix_len, range, err); + } + LitError::InvalidSuffix => { + if let Some(suffix) = suffix { + sess.emit_err(errors::InvalidLiteralSuffix { span, kind: kind.descr(), suffix }); + } + } + LitError::InvalidIntSuffix => { + let suf = suffix.expect("suffix error with no suffix"); + let suf = suf.as_str(); + if looks_like_width_suffix(&['i', 'u'], suf) { + // If it looks like a width, try to be helpful. + sess.emit_err(errors::InvalidIntLiteralWidth { span, width: suf[1..].into() }); + } else if let Some(fixed) = fix_base_capitalisation(symbol.as_str(), suf) { + sess.emit_err(errors::InvalidNumLiteralBasePrefix { span, fixed }); + } else { + sess.emit_err(errors::InvalidNumLiteralSuffix { span, suffix: suf.to_string() }); + } + } + LitError::InvalidFloatSuffix => { + let suf = suffix.expect("suffix error with no suffix"); + let suf = suf.as_str(); + if looks_like_width_suffix(&['f'], suf) { + // If it looks like a width, try to be helpful. + sess.emit_err(errors::InvalidFloatLiteralWidth { + span, + width: suf[1..].to_string(), + }); + } else { + sess.emit_err(errors::InvalidFloatLiteralSuffix { span, suffix: suf.to_string() }); + } + } + LitError::NonDecimalFloat(base) => { + match base { + 16 => sess.emit_err(errors::HexadecimalFloatLiteralNotSupported { span }), + 8 => sess.emit_err(errors::OctalFloatLiteralNotSupported { span }), + 2 => sess.emit_err(errors::BinaryFloatLiteralNotSupported { span }), + _ => unreachable!(), + }; + } + LitError::IntTooLarge(base) => { + let max = u128::MAX; + let limit = match base { + 2 => format!("{max:#b}"), + 8 => format!("{max:#o}"), + 16 => format!("{max:#x}"), + _ => format!("{max}"), + }; + sess.emit_err(errors::IntLiteralTooLarge { span, limit }); + } + } +} + /// Used to forbid `let` expressions in certain syntactic locations. #[derive(Clone, Copy, Subdiagnostic)] pub(crate) enum ForbiddenLetReason { diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index b91432f10c8fa..bf39ac924ecc5 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -11,6 +11,9 @@ mod stmt; mod ty; use crate::lexer::UnmatchedDelim; +pub use crate::parser::expr::{ + token_lit_to_lit_kind_and_report_errs, token_lit_to_meta_item_lit_and_report_errs, +}; pub use attr_wrapper::AttrWrapper; pub use diagnostics::AttemptLocalParseRecovery; pub(crate) use expr::ForbiddenLetReason; diff --git a/compiler/rustc_parse/src/validate_attr.rs b/compiler/rustc_parse/src/validate_attr.rs index 9fea3826652c6..02c1e153afebb 100644 --- a/compiler/rustc_parse/src/validate_attr.rs +++ b/compiler/rustc_parse/src/validate_attr.rs @@ -1,14 +1,13 @@ //! Meta-syntax validation logic of attributes for post-expansion. -use crate::{errors, parse_in}; +use crate::{errors, parse_in, parser}; use rustc_ast::token::Delimiter; use rustc_ast::tokenstream::DelimSpan; -use rustc_ast::MetaItemKind; use rustc_ast::{self as ast, AttrArgs, AttrArgsEq, Attribute, DelimArgs, MetaItem}; +use rustc_ast::{LitKind, MetaItemKind, MetaItemLit}; use rustc_errors::{Applicability, FatalError, PResult}; use rustc_feature::{AttributeTemplate, BuiltinAttribute, BUILTIN_ATTRIBUTE_MAP}; -use rustc_session::errors::report_lit_error; use rustc_session::lint::builtin::ILL_FORMED_ATTRIBUTE_INPUT; use rustc_session::parse::ParseSess; use rustc_span::{sym, Span, Symbol}; @@ -52,8 +51,10 @@ pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, Meta } AttrArgs::Eq(_, AttrArgsEq::Ast(expr)) => { if let ast::ExprKind::Lit(token_lit) = expr.kind { - let res = ast::MetaItemLit::from_token_lit(token_lit, expr.span); - let res = match res { + let lit = parser::token_lit_to_meta_item_lit_and_report_errs( + sess, token_lit, expr.span, expr.span, + ); + match lit { Ok(lit) => { if token_lit.suffix.is_some() { let mut err = sess.dcx.struct_span_err( @@ -69,18 +70,16 @@ pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, Meta MetaItemKind::NameValue(lit) } } - Err(err) => { - report_lit_error(sess, err, token_lit, expr.span); - let lit = ast::MetaItemLit { + Err(()) => { + let lit = MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, - kind: ast::LitKind::Err, + kind: LitKind::Err, span: expr.span, }; MetaItemKind::NameValue(lit) } - }; - res + } } else { // Example cases: // - `#[foo = 1+1]`: results in `ast::ExprKind::BinOp`. diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl index f2e646c70f577..712388775fc30 100644 --- a/compiler/rustc_session/messages.ftl +++ b/compiler/rustc_session/messages.ftl @@ -1,4 +1,3 @@ -session_binary_float_literal_not_supported = binary float literal is not supported session_branch_protection_requires_aarch64 = `-Zbranch-protection` is only supported on aarch64 session_cannot_enable_crt_static_linux = sanitizer is incompatible with statically linked libc, disable it using `-C target-feature=-crt-static` @@ -32,50 +31,18 @@ session_function_return_requires_x86_or_x86_64 = `-Zfunction-return` (except `ke session_function_return_thunk_extern_requires_non_large_code_model = `-Zfunction-return=thunk-extern` is only supported on non-large code models -session_hexadecimal_float_literal_not_supported = hexadecimal float literal is not supported - session_incompatible_linker_flavor = linker flavor `{$flavor}` is incompatible with the current target .note = compatible flavors are: {$compatible_list} session_instrumentation_not_supported = {$us} instrumentation is not supported for this target -session_int_literal_too_large = integer literal is too large - .note = value exceeds limit of `{$limit}` - session_invalid_character_in_create_name = invalid character `{$character}` in crate name: `{$crate_name}` session_invalid_character_in_create_name_help = you can either pass `--crate-name` on the command line or add `#![crate_name="…"]` to set the crate name -session_invalid_float_literal_suffix = invalid suffix `{$suffix}` for float literal - .label = invalid suffix `{$suffix}` - .help = valid suffixes are `f32` and `f64` - -session_invalid_float_literal_width = invalid width `{$width}` for float literal - .help = valid widths are 32 and 64 - -session_invalid_int_literal_width = invalid width `{$width}` for integer literal - .help = valid widths are 8, 16, 32, 64 and 128 - -session_invalid_literal_suffix = suffixes on {$kind} literals are invalid - .label = invalid suffix `{$suffix}` - -session_invalid_num_literal_base_prefix = invalid base prefix for number literal - .note = base prefixes (`0xff`, `0b1010`, `0o755`) are lowercase - .suggestion = try making the prefix lowercase - -session_invalid_num_literal_suffix = invalid suffix `{$suffix}` for number literal - .label = invalid suffix `{$suffix}` - .help = the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.) - session_linker_plugin_lto_windows_not_supported = linker plugin based LTO is not supported together with `-C prefer-dynamic` when targeting Windows-like targets session_not_circumvent_feature = `-Zunleash-the-miri-inside-of-you` may not be used to circumvent feature gates, except when testing error paths in the CTFE engine -session_not_supported = not supported - -session_nul_in_c_str = null characters in C string literals are not supported - -session_octal_float_literal_not_supported = octal float literal is not supported - session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg} session_profile_sample_use_file_does_not_exist = file `{$path}` passed to `-C profile-sample-use` does not exist. diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs index c3360815ac9f8..f1d578a2c9176 100644 --- a/compiler/rustc_session/src/errors.rs +++ b/compiler/rustc_session/src/errors.rs @@ -1,11 +1,8 @@ use std::num::NonZeroU32; -use crate::parse::ParseSess; -use rustc_ast::token; -use rustc_ast::util::literal::LitError; use rustc_errors::{error_code, DiagnosticMessage, ErrorGuaranteed, IntoDiagnostic, MultiSpan}; use rustc_macros::Diagnostic; -use rustc_span::{BytePos, Span, Symbol}; +use rustc_span::{Span, Symbol}; use rustc_target::spec::{SplitDebuginfo, StackProtector, TargetTriple}; pub struct FeatureGateError { @@ -237,192 +234,6 @@ pub enum UnleashedFeatureHelp { }, } -#[derive(Diagnostic)] -#[diag(session_invalid_literal_suffix)] -pub(crate) struct InvalidLiteralSuffix<'a> { - #[primary_span] - #[label] - pub span: Span, - // FIXME(#100717) - pub kind: &'a str, - pub suffix: Symbol, -} - -#[derive(Diagnostic)] -#[diag(session_invalid_int_literal_width)] -#[help] -pub(crate) struct InvalidIntLiteralWidth { - #[primary_span] - pub span: Span, - pub width: String, -} - -#[derive(Diagnostic)] -#[diag(session_invalid_num_literal_base_prefix)] -#[note] -pub(crate) struct InvalidNumLiteralBasePrefix { - #[primary_span] - #[suggestion(applicability = "maybe-incorrect", code = "{fixed}")] - pub span: Span, - pub fixed: String, -} - -#[derive(Diagnostic)] -#[diag(session_invalid_num_literal_suffix)] -#[help] -pub(crate) struct InvalidNumLiteralSuffix { - #[primary_span] - #[label] - pub span: Span, - pub suffix: String, -} - -#[derive(Diagnostic)] -#[diag(session_invalid_float_literal_width)] -#[help] -pub(crate) struct InvalidFloatLiteralWidth { - #[primary_span] - pub span: Span, - pub width: String, -} - -#[derive(Diagnostic)] -#[diag(session_invalid_float_literal_suffix)] -#[help] -pub(crate) struct InvalidFloatLiteralSuffix { - #[primary_span] - #[label] - pub span: Span, - pub suffix: String, -} - -#[derive(Diagnostic)] -#[diag(session_int_literal_too_large)] -#[note] -pub(crate) struct IntLiteralTooLarge { - #[primary_span] - pub span: Span, - pub limit: String, -} - -#[derive(Diagnostic)] -#[diag(session_hexadecimal_float_literal_not_supported)] -pub(crate) struct HexadecimalFloatLiteralNotSupported { - #[primary_span] - #[label(session_not_supported)] - pub span: Span, -} - -#[derive(Diagnostic)] -#[diag(session_octal_float_literal_not_supported)] -pub(crate) struct OctalFloatLiteralNotSupported { - #[primary_span] - #[label(session_not_supported)] - pub span: Span, -} - -#[derive(Diagnostic)] -#[diag(session_binary_float_literal_not_supported)] -pub(crate) struct BinaryFloatLiteralNotSupported { - #[primary_span] - #[label(session_not_supported)] - pub span: Span, -} - -#[derive(Diagnostic)] -#[diag(session_nul_in_c_str)] -pub(crate) struct NulInCStr { - #[primary_span] - pub span: Span, -} - -pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { - // Checks if `s` looks like i32 or u1234 etc. - fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { - s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) - } - - // Try to lowercase the prefix if the prefix and suffix are valid. - fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option { - let mut chars = suffix.chars(); - - let base_char = chars.next().unwrap(); - let base = match base_char { - 'B' => 2, - 'O' => 8, - 'X' => 16, - _ => return None, - }; - - // check that the suffix contains only base-appropriate characters - let valid = prefix == "0" - && chars - .filter(|c| *c != '_') - .take_while(|c| *c != 'i' && *c != 'u') - .all(|c| c.to_digit(base).is_some()); - - valid.then(|| format!("0{}{}", base_char.to_ascii_lowercase(), &suffix[1..])) - } - - let token::Lit { kind, symbol, suffix, .. } = lit; - match err { - // `LexerError` is an error, but it was already reported - // by lexer, so here we don't report it the second time. - LitError::LexerError => {} - LitError::InvalidSuffix => { - if let Some(suffix) = suffix { - sess.emit_err(InvalidLiteralSuffix { span, kind: kind.descr(), suffix }); - } - } - LitError::InvalidIntSuffix => { - let suf = suffix.expect("suffix error with no suffix"); - let suf = suf.as_str(); - if looks_like_width_suffix(&['i', 'u'], suf) { - // If it looks like a width, try to be helpful. - sess.emit_err(InvalidIntLiteralWidth { span, width: suf[1..].into() }); - } else if let Some(fixed) = fix_base_capitalisation(symbol.as_str(), suf) { - sess.emit_err(InvalidNumLiteralBasePrefix { span, fixed }); - } else { - sess.emit_err(InvalidNumLiteralSuffix { span, suffix: suf.to_string() }); - } - } - LitError::InvalidFloatSuffix => { - let suf = suffix.expect("suffix error with no suffix"); - let suf = suf.as_str(); - if looks_like_width_suffix(&['f'], suf) { - // If it looks like a width, try to be helpful. - sess.emit_err(InvalidFloatLiteralWidth { span, width: suf[1..].to_string() }); - } else { - sess.emit_err(InvalidFloatLiteralSuffix { span, suffix: suf.to_string() }); - } - } - LitError::NonDecimalFloat(base) => { - match base { - 16 => sess.emit_err(HexadecimalFloatLiteralNotSupported { span }), - 8 => sess.emit_err(OctalFloatLiteralNotSupported { span }), - 2 => sess.emit_err(BinaryFloatLiteralNotSupported { span }), - _ => unreachable!(), - }; - } - LitError::IntTooLarge(base) => { - let max = u128::MAX; - let limit = match base { - 2 => format!("{max:#b}"), - 8 => format!("{max:#o}"), - 16 => format!("{max:#x}"), - _ => format!("{max}"), - }; - sess.emit_err(IntLiteralTooLarge { span, limit }); - } - LitError::NulInCStr(range) => { - let lo = BytePos(span.lo().0 + range.start as u32 + 2); - let hi = BytePos(span.lo().0 + range.end as u32 + 2); - let span = span.with_lo(lo).with_hi(hi); - sess.emit_err(NulInCStr { span }); - } - } -} - #[derive(Diagnostic)] #[diag(session_optimization_fuel_exhausted)] pub struct OptimisationFuelExhausted { diff --git a/tests/rustdoc-ui/ignore-block-help.rs b/tests/rustdoc-ui/ignore-block-help.rs index 86f6a2868fb56..fb27d954f9a5a 100644 --- a/tests/rustdoc-ui/ignore-block-help.rs +++ b/tests/rustdoc-ui/ignore-block-help.rs @@ -1,10 +1,10 @@ // check-pass /// ```ignore (to-prevent-tidy-error) -/// let heart = '❤️'; +/// let unterminated = ' /// ``` //~^^^ WARNING could not parse code block //~| NOTE on by default -//~| NOTE character literal may only contain one codepoint +//~| NOTE unterminated character literal //~| HELP `ignore` code blocks require valid Rust code pub struct X; diff --git a/tests/rustdoc-ui/ignore-block-help.stderr b/tests/rustdoc-ui/ignore-block-help.stderr index a30ea51dd8a7f..f5ed287a99834 100644 --- a/tests/rustdoc-ui/ignore-block-help.stderr +++ b/tests/rustdoc-ui/ignore-block-help.stderr @@ -3,7 +3,7 @@ warning: could not parse code block as Rust code | LL | /// ```ignore (to-prevent-tidy-error) | _____^ -LL | | /// let heart = '❤️'; +LL | | /// let unterminated = ' LL | | /// ``` | |_______^ | @@ -12,7 +12,7 @@ help: `ignore` code blocks require valid Rust code for syntax highlighting; mark | LL | /// ```ignore (to-prevent-tidy-error) | ^^^ - = note: error from rustc: character literal may only contain one codepoint + = note: error from rustc: unterminated character literal = note: `#[warn(rustdoc::invalid_rust_codeblocks)]` on by default warning: 1 warning emitted diff --git a/tests/ui/fmt/format-string-error-2.stderr b/tests/ui/fmt/format-string-error-2.stderr index dfd24bf60ad52..50ead59e4e911 100644 --- a/tests/ui/fmt/format-string-error-2.stderr +++ b/tests/ui/fmt/format-string-error-2.stderr @@ -1,9 +1,3 @@ -error: incorrect unicode escape sequence - --> $DIR/format-string-error-2.rs:77:20 - | -LL | println!("\x7B}\u8 {", 1); - | ^^^ help: format of unicode escape sequences uses braces: `\u{8}` - error: invalid format string: expected `'}'`, found `'a'` --> $DIR/format-string-error-2.rs:5:5 | @@ -155,6 +149,12 @@ LL | println!("\x7B}\u{8} {", 1); | = note: if you intended to print `{`, you can escape it using `{{` +error: incorrect unicode escape sequence + --> $DIR/format-string-error-2.rs:77:20 + | +LL | println!("\x7B}\u8 {", 1); + | ^^^ help: format of unicode escape sequences uses braces: `\u{8}` + error: invalid format string: unmatched `}` found --> $DIR/format-string-error-2.rs:81:21 | diff --git a/tests/ui/lexer/error-stage.rs b/tests/ui/lexer/error-stage.rs index c8d88f745a1f0..5edb334c109dd 100644 --- a/tests/ui/lexer/error-stage.rs +++ b/tests/ui/lexer/error-stage.rs @@ -1,3 +1,5 @@ +// edition:2021 + // This test is about the treatment of invalid literals. In particular, some // literals are only considered invalid if they survive to HIR lowering. // @@ -41,6 +43,11 @@ // https://doc.rust-lang.org/reference/tokens.html#integer-literals says that // literals like `128_i8` and `256_u8` "are too big for their type, but are // still valid tokens". +// +// String literals, etc. +// --------------------- +// There are various ways that char, byte, and string literals can be invalid, +// mostly involving invalid escape sequences. macro_rules! sink { ($($x:tt;)*) => {()} @@ -48,7 +55,7 @@ macro_rules! sink { // The invalid literals are ignored because the macro consumes them. Except for // `0b10.0f32` because it's a lexer error. -const _: () = sink! { +const c1: () = sink! { "string"any_suffix; // OK 10u123; // OK 10.0f123; // OK @@ -60,7 +67,7 @@ const _: () = sink! { // The invalid literals used to cause errors, but this was changed by #102944. // Except for `0b010.0f32`, because it's a lexer error. #[cfg(FALSE)] -fn configured_out() { +fn configured_out1() { "string"any_suffix; // OK 10u123; // OK 10.0f123; // OK @@ -70,7 +77,7 @@ fn configured_out() { } // All the invalid literals cause errors. -fn main() { +fn f1() { "string"any_suffix; //~ ERROR suffixes on string literals are invalid 10u123; //~ ERROR invalid width `123` for integer literal 10.0f123; //~ ERROR invalid width `123` for float literal @@ -78,3 +85,36 @@ fn main() { 0b10.0f32; //~ ERROR binary float literal is not supported 999340282366920938463463374607431768211455999; //~ ERROR integer literal is too large } + +// These invalid literals used to cause errors, but this was changed by #118699. +const c2: () = sink! { + ''; + b'ab'; + "\a"; + b"\xzz"; + "\u20"; + c"\u{999999}"; +}; + +// These invalid literals used to cause errors, but this was changed by #118699. +#[cfg(FALSE)] +fn configured_out2() { + ''; + b'ab'; + "\a"; + b"\xzz"; + "\u20"; + c"\u{999999}"; +} + +// These invalid literals cause errors. +fn f2() { + ''; //~ ERROR empty character literal + b'ab'; //~ ERROR character literal may only contain one codepoint + "\a"; //~ ERROR unknown character escape: `a` + b"\xzz"; //~ ERROR invalid character in numeric character escape + "\u20"; //~ ERROR incorrect unicode escape sequence + c"\u{999999}"; //~ ERROR invalid unicode character escape +} + +fn main() {} diff --git a/tests/ui/lexer/error-stage.stderr b/tests/ui/lexer/error-stage.stderr index ecbdb14dc868e..f3572600ac193 100644 --- a/tests/ui/lexer/error-stage.stderr +++ b/tests/ui/lexer/error-stage.stderr @@ -1,29 +1,29 @@ error: binary float literal is not supported - --> $DIR/error-stage.rs:56:5 + --> $DIR/error-stage.rs:63:5 | LL | 0b10.0f32; | ^^^^^^ error: binary float literal is not supported - --> $DIR/error-stage.rs:68:5 + --> $DIR/error-stage.rs:75:5 | LL | 0b10.0f32; | ^^^^^^ error: binary float literal is not supported - --> $DIR/error-stage.rs:78:5 + --> $DIR/error-stage.rs:85:5 | LL | 0b10.0f32; | ^^^^^^ error: suffixes on string literals are invalid - --> $DIR/error-stage.rs:74:5 + --> $DIR/error-stage.rs:81:5 | LL | "string"any_suffix; | ^^^^^^^^^^^^^^^^^^ invalid suffix `any_suffix` error: invalid width `123` for integer literal - --> $DIR/error-stage.rs:75:5 + --> $DIR/error-stage.rs:82:5 | LL | 10u123; | ^^^^^^ @@ -31,7 +31,7 @@ LL | 10u123; = help: valid widths are 8, 16, 32, 64 and 128 error: invalid width `123` for float literal - --> $DIR/error-stage.rs:76:5 + --> $DIR/error-stage.rs:83:5 | LL | 10.0f123; | ^^^^^^^^ @@ -39,18 +39,69 @@ LL | 10.0f123; = help: valid widths are 32 and 64 error: binary float literal is not supported - --> $DIR/error-stage.rs:77:5 + --> $DIR/error-stage.rs:84:5 | LL | 0b10f32; | ^^^^^^^ not supported error: integer literal is too large - --> $DIR/error-stage.rs:79:5 + --> $DIR/error-stage.rs:86:5 | LL | 999340282366920938463463374607431768211455999; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: value exceeds limit of `340282366920938463463374607431768211455` -error: aborting due to 8 previous errors +error: empty character literal + --> $DIR/error-stage.rs:112:6 + | +LL | ''; + | ^ empty character literal + +error: character literal may only contain one codepoint + --> $DIR/error-stage.rs:113:5 + | +LL | b'ab'; + | ^^^^^ + | +help: if you meant to write a byte string literal, use double quotes + | +LL | b"ab"; + | ~~~~~ + +error: unknown character escape: `a` + --> $DIR/error-stage.rs:114:7 + | +LL | "\a"; + | ^ unknown character escape + | + = help: for more information, visit +help: if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal + | +LL | r"\a"; + | ~~~~~ + +error: invalid character in numeric character escape: `z` + --> $DIR/error-stage.rs:115:9 + | +LL | b"\xzz"; + | ^ invalid character in numeric character escape + +error: incorrect unicode escape sequence + --> $DIR/error-stage.rs:116:6 + | +LL | "\u20"; + | ^^^- + | | + | help: format of unicode escape sequences uses braces: `\u{20}` + +error: invalid unicode character escape + --> $DIR/error-stage.rs:117:7 + | +LL | c"\u{999999}"; + | ^^^^^^^^^^ invalid escape + | + = help: unicode escape must be at most 10FFFF + +error: aborting due to 14 previous errors diff --git a/tests/ui/lexer/lex-bad-char-literals-7.rs b/tests/ui/lexer/lex-bad-char-literals-7.rs index c675df2f3ccd0..55484a610141b 100644 --- a/tests/ui/lexer/lex-bad-char-literals-7.rs +++ b/tests/ui/lexer/lex-bad-char-literals-7.rs @@ -7,7 +7,4 @@ fn main() { // Next two are OK, but may befool error recovery let _ = '/'; let _ = b'/'; - - let _ = ' hello // here's a comment - //~^ ERROR: unterminated character literal } diff --git a/tests/ui/lexer/lex-bad-char-literals-7.stderr b/tests/ui/lexer/lex-bad-char-literals-7.stderr index 255b9c6899999..16ba7676932fd 100644 --- a/tests/ui/lexer/lex-bad-char-literals-7.stderr +++ b/tests/ui/lexer/lex-bad-char-literals-7.stderr @@ -10,12 +10,5 @@ error: empty unicode escape LL | let _: char = '\u{}'; | ^^^^ this escape must have at least 1 hex digit -error[E0762]: unterminated character literal - --> $DIR/lex-bad-char-literals-7.rs:11:13 - | -LL | let _ = ' hello // here's a comment - | ^^^^^^^^ - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors -For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/lexer/lex-bad-char-literals-8.rs b/tests/ui/lexer/lex-bad-char-literals-8.rs new file mode 100644 index 0000000000000..6c8cbd3a82a85 --- /dev/null +++ b/tests/ui/lexer/lex-bad-char-literals-8.rs @@ -0,0 +1,4 @@ +fn main() { + let _ = ' hello // here's a comment + //~^ ERROR: unterminated character literal +} diff --git a/tests/ui/lexer/lex-bad-char-literals-8.stderr b/tests/ui/lexer/lex-bad-char-literals-8.stderr new file mode 100644 index 0000000000000..04c95df0d0601 --- /dev/null +++ b/tests/ui/lexer/lex-bad-char-literals-8.stderr @@ -0,0 +1,9 @@ +error[E0762]: unterminated character literal + --> $DIR/lex-bad-char-literals-8.rs:2:13 + | +LL | let _ = ' hello // here's a comment + | ^^^^^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/parser/byte-literals-2.rs b/tests/ui/parser/byte-literals-2.rs new file mode 100644 index 0000000000000..fb9e2ac69944a --- /dev/null +++ b/tests/ui/parser/byte-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + b'a //~ ERROR unterminated byte constant [E0763] +} diff --git a/tests/ui/parser/byte-literals-2.stderr b/tests/ui/parser/byte-literals-2.stderr new file mode 100644 index 0000000000000..f0e042ad605db --- /dev/null +++ b/tests/ui/parser/byte-literals-2.stderr @@ -0,0 +1,9 @@ +error[E0763]: unterminated byte constant + --> $DIR/byte-literals-2.rs:2:6 + | +LL | b'a + | ^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0763`. diff --git a/tests/ui/parser/byte-literals.rs b/tests/ui/parser/byte-literals.rs index 896dc1a1a5fba..963a0bb608d84 100644 --- a/tests/ui/parser/byte-literals.rs +++ b/tests/ui/parser/byte-literals.rs @@ -8,5 +8,4 @@ pub fn main() { b' '; //~ ERROR byte constant must be escaped b'''; //~ ERROR byte constant must be escaped b'é'; //~ ERROR non-ASCII character in byte literal - b'a //~ ERROR unterminated byte constant [E0763] } diff --git a/tests/ui/parser/byte-literals.stderr b/tests/ui/parser/byte-literals.stderr index 5b414c8927e2c..97805e01db49f 100644 --- a/tests/ui/parser/byte-literals.stderr +++ b/tests/ui/parser/byte-literals.stderr @@ -43,12 +43,5 @@ help: if you meant to use the unicode code point for 'é', use a \xHH escape LL | b'\xE9'; | ~~~~ -error[E0763]: unterminated byte constant - --> $DIR/byte-literals.rs:11:6 - | -LL | b'a - | ^^^^ - -error: aborting due to 7 previous errors +error: aborting due to 6 previous errors -For more information about this error, try `rustc --explain E0763`. diff --git a/tests/ui/parser/byte-string-literals-2.rs b/tests/ui/parser/byte-string-literals-2.rs new file mode 100644 index 0000000000000..7eb52b854e358 --- /dev/null +++ b/tests/ui/parser/byte-string-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + b"a //~ ERROR unterminated double quote byte string +} diff --git a/tests/ui/parser/byte-string-literals-2.stderr b/tests/ui/parser/byte-string-literals-2.stderr new file mode 100644 index 0000000000000..6fdb3c64ba783 --- /dev/null +++ b/tests/ui/parser/byte-string-literals-2.stderr @@ -0,0 +1,11 @@ +error[E0766]: unterminated double quote byte string + --> $DIR/byte-string-literals-2.rs:2:6 + | +LL | b"a + | ______^ +LL | | } + | |__^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0766`. diff --git a/tests/ui/parser/byte-string-literals.rs b/tests/ui/parser/byte-string-literals.rs index 30a4f50c4e40b..c14488dcb6689 100644 --- a/tests/ui/parser/byte-string-literals.rs +++ b/tests/ui/parser/byte-string-literals.rs @@ -5,5 +5,4 @@ pub fn main() { b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z` b"é"; //~ ERROR non-ASCII character in byte string literal br##"é"##; //~ ERROR non-ASCII character in raw byte string literal - b"a //~ ERROR unterminated double quote byte string } diff --git a/tests/ui/parser/byte-string-literals.stderr b/tests/ui/parser/byte-string-literals.stderr index 655b6998e85ff..2a2830c346825 100644 --- a/tests/ui/parser/byte-string-literals.stderr +++ b/tests/ui/parser/byte-string-literals.stderr @@ -37,14 +37,5 @@ error: non-ASCII character in raw byte string literal LL | br##"é"##; | ^ must be ASCII -error[E0766]: unterminated double quote byte string - --> $DIR/byte-string-literals.rs:8:6 - | -LL | b"a - | ______^ -LL | | } - | |__^ - -error: aborting due to 6 previous errors +error: aborting due to 5 previous errors -For more information about this error, try `rustc --explain E0766`. diff --git a/tests/ui/parser/issues/issue-62913.rs b/tests/ui/parser/issues/issue-62913.rs index a55ef5ac71030..c77ef61a97b10 100644 --- a/tests/ui/parser/issues/issue-62913.rs +++ b/tests/ui/parser/issues/issue-62913.rs @@ -1,4 +1,5 @@ -"\u\\" -//~^ ERROR incorrect unicode escape sequence -//~| ERROR invalid trailing slash in literal -//~| ERROR expected item, found `"\u\"` +fn main() { + _ = "\u\\"; + //~^ ERROR incorrect unicode escape sequence + //~| ERROR invalid trailing slash in literal +} diff --git a/tests/ui/parser/issues/issue-62913.stderr b/tests/ui/parser/issues/issue-62913.stderr index c33e46837287f..bee6dd4580037 100644 --- a/tests/ui/parser/issues/issue-62913.stderr +++ b/tests/ui/parser/issues/issue-62913.stderr @@ -1,24 +1,16 @@ error: incorrect unicode escape sequence - --> $DIR/issue-62913.rs:1:2 + --> $DIR/issue-62913.rs:2:10 | -LL | "\u\" - | ^^^ incorrect unicode escape sequence +LL | _ = "\u\"; + | ^^^ incorrect unicode escape sequence | = help: format of unicode escape sequences is `\u{...}` error: invalid trailing slash in literal - --> $DIR/issue-62913.rs:1:5 + --> $DIR/issue-62913.rs:2:13 | -LL | "\u\" - | ^ invalid trailing slash in literal +LL | _ = "\u\"; + | ^ invalid trailing slash in literal -error: expected item, found `"\u\"` - --> $DIR/issue-62913.rs:1:1 - | -LL | "\u\" - | ^^^^^^ expected item - | - = note: for a full list of items that can appear in modules, see - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors diff --git a/tests/ui/parser/macro/literals-are-validated-before-expansion.rs b/tests/ui/parser/macro/literals-are-validated-before-expansion.rs deleted file mode 100644 index c3fc754b5567f..0000000000000 --- a/tests/ui/parser/macro/literals-are-validated-before-expansion.rs +++ /dev/null @@ -1,10 +0,0 @@ -macro_rules! black_hole { - ($($tt:tt)*) => {} -} - -fn main() { - black_hole! { '\u{FFFFFF}' } - //~^ ERROR: invalid unicode character escape - black_hole! { "this is surrogate: \u{DAAA}" } - //~^ ERROR: invalid unicode character escape -} diff --git a/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr b/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr deleted file mode 100644 index e874f62497ea8..0000000000000 --- a/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr +++ /dev/null @@ -1,18 +0,0 @@ -error: invalid unicode character escape - --> $DIR/literals-are-validated-before-expansion.rs:6:20 - | -LL | black_hole! { '\u{FFFFFF}' } - | ^^^^^^^^^^ invalid escape - | - = help: unicode escape must be at most 10FFFF - -error: invalid unicode character escape - --> $DIR/literals-are-validated-before-expansion.rs:8:39 - | -LL | black_hole! { "this is surrogate: \u{DAAA}" } - | ^^^^^^^^ invalid escape - | - = help: unicode escape must not be a surrogate - -error: aborting due to 2 previous errors - diff --git a/tests/ui/parser/raw/raw-byte-string-literals-2.rs b/tests/ui/parser/raw/raw-byte-string-literals-2.rs new file mode 100644 index 0000000000000..8ffda513dbf6f --- /dev/null +++ b/tests/ui/parser/raw/raw-byte-string-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation +} diff --git a/tests/ui/parser/raw/raw-byte-string-literals-2.stderr b/tests/ui/parser/raw/raw-byte-string-literals-2.stderr new file mode 100644 index 0000000000000..b4151eeef7017 --- /dev/null +++ b/tests/ui/parser/raw/raw-byte-string-literals-2.stderr @@ -0,0 +1,8 @@ +error: found invalid character; only `#` is allowed in raw string delimitation: ~ + --> $DIR/raw-byte-string-literals-2.rs:2:5 + | +LL | br##~"a"~##; + | ^^^^^ + +error: aborting due to 1 previous error + diff --git a/tests/ui/parser/raw/raw-byte-string-literals.rs b/tests/ui/parser/raw/raw-byte-string-literals.rs index 1b859fee596ad..3f91c381a9039 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.rs +++ b/tests/ui/parser/raw/raw-byte-string-literals.rs @@ -3,5 +3,4 @@ pub fn main() { br"a "; //~ ERROR bare CR not allowed in raw string br"é"; //~ ERROR non-ASCII character in raw byte string literal - br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation } diff --git a/tests/ui/parser/raw/raw-byte-string-literals.stderr b/tests/ui/parser/raw/raw-byte-string-literals.stderr index a2f27d1ed70ae..2a4073243cbca 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.stderr +++ b/tests/ui/parser/raw/raw-byte-string-literals.stderr @@ -10,11 +10,5 @@ error: non-ASCII character in raw byte string literal LL | br"é"; | ^ must be ASCII -error: found invalid character; only `#` is allowed in raw string delimitation: ~ - --> $DIR/raw-byte-string-literals.rs:6:5 - | -LL | br##~"a"~##; - | ^^^^^ - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index fc071a9419142..806e222507f6e 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -1,87 +1,3 @@ -error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:26 - | -LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); - | ^^^^^^^^ unicode escape in byte string - | - = help: unicode escape sequences cannot be used as a byte or in a byte string - -error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:35 - | -LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); - | ^^^^^^^^ unicode escape in byte string - | - = help: unicode escape sequences cannot be used as a byte or in a byte string - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:26 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{202e}' - | -help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes - | -LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:30 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2066}' - | -help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes - | -LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:41 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2069}' - | -help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes - | -LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:43 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2066}' - | -help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes - | -LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:29 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{202e}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:33 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2066}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:44 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2069}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:46 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2066}' - error: unicode codepoint changing visible direction of text present in comment --> $DIR/unicode-control-codepoints.rs:2:5 | @@ -188,5 +104,89 @@ LL | | * ''); */fn bar() {} = note: if their presence wasn't intentional, you can remove them = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:26 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:35 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:26 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{202e}' + | +help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes + | +LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:30 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:41 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2069}' + | +help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:43 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:29 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{202e}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:33 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:44 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2069}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:46 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + error: aborting due to 17 previous errors diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr index ff9006f6f97f1..ee31c43fcc35b 100644 Binary files a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr and b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr differ