From 8ff3903643b530c9029e8f2c6c6956fda8f21d77 Mon Sep 17 00:00:00 2001 From: Deadbeef Date: Sun, 5 Mar 2023 15:03:22 +0000 Subject: [PATCH] initial step towards implementing C string literals --- compiler/rustc_ast/src/ast.rs | 3 + compiler/rustc_ast/src/token.rs | 7 + compiler/rustc_ast/src/util/literal.rs | 55 ++++- compiler/rustc_ast_pretty/src/pprust/state.rs | 2 + compiler/rustc_builtin_macros/src/concat.rs | 4 + .../rustc_builtin_macros/src/concat_bytes.rs | 4 + .../rustc_expand/src/proc_macro_server.rs | 4 + compiler/rustc_hir/src/lang_items.rs | 1 + .../rustc_hir_typeck/src/fn_ctxt/checks.rs | 5 + compiler/rustc_lexer/src/lib.rs | 30 +++ compiler/rustc_lexer/src/unescape.rs | 199 +++++++++++------- compiler/rustc_parse/src/lexer/mod.rs | 64 ++++++ library/core/src/ffi/c_str.rs | 1 + src/librustdoc/html/highlight.rs | 4 +- .../src/matches/match_same_arms.rs | 1 + .../clippy/clippy_lints/src/utils/author.rs | 5 + src/tools/clippy/clippy_utils/src/consts.rs | 1 + 17 files changed, 310 insertions(+), 80 deletions(-) diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index ea04ba4f66e46..fb22e4640647d 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -1814,6 +1814,8 @@ pub enum LitKind { /// A byte string (`b"foo"`). Not stored as a symbol because it might be /// non-utf8, and symbols only allow utf8 strings. ByteStr(Lrc<[u8]>, StrStyle), + /// A C String (`c"foo"`). + CStr(Lrc<[u8]>, StrStyle), /// A byte char (`b'f'`). Byte(u8), /// A character literal (`'a'`). @@ -1868,6 +1870,7 @@ impl LitKind { // unsuffixed variants LitKind::Str(..) | LitKind::ByteStr(..) + | LitKind::CStr(..) | LitKind::Byte(..) | LitKind::Char(..) | LitKind::Int(_, LitIntType::Unsuffixed) diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index f947ae4d05732..42b843482a32b 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -74,6 +74,8 @@ pub enum LitKind { StrRaw(u8), // raw string delimited by `n` hash symbols ByteStr, ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols + CStr, + CStrRaw(u8), Err, } @@ -141,6 +143,10 @@ impl fmt::Display for Lit { delim = "#".repeat(n as usize), string = symbol )?, + CStr => write!(f, "c\"{symbol}\"")?, + CStrRaw(n) => { + write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))? + } Integer | Float | Bool | Err => write!(f, "{symbol}")?, } @@ -170,6 +176,7 @@ impl LitKind { Float => "float", Str | StrRaw(..) => "string", ByteStr | ByteStrRaw(..) => "byte string", + CStr | CStrRaw(..) => "C string", Err => "error", } } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 74b842ac96eac..8534011e18921 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,7 +2,10 @@ use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; -use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; +use rustc_lexer::unescape::{ + byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, + Mode, +}; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; use std::{ascii, fmt, str}; @@ -158,6 +161,52 @@ impl LitKind { LitKind::ByteStr(bytes.into(), StrStyle::Raw(n)) } + token::CStr => { + let s = symbol.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_c_string(s, Mode::CStr, &mut |span, c| match c { + Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { + error = Err(LitError::NulInCStr(span)); + } + Ok(CStrUnit::Byte(b)) => buf.push(b), + Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), + Ok(CStrUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } + }); + error?; + buf.push(b'\0'); + LitKind::CStr(buf.into(), StrStyle::Cooked) + } + token::CStrRaw(n) => { + let s = symbol.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c { + Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { + error = Err(LitError::NulInCStr(span)); + } + Ok(CStrUnit::Byte(b)) => buf.push(b), + Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8), + Ok(CStrUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } + }); + error?; + buf.push(b'\0'); + LitKind::CStr(buf.into(), StrStyle::Raw(n)) + } token::Err => LitKind::Err, }) } @@ -191,6 +240,8 @@ impl fmt::Display for LitKind { string = symbol )?; } + // TODO need to reescape + LitKind::CStr(..) => todo!(), LitKind::Int(n, ty) => { write!(f, "{n}")?; match ty { @@ -237,6 +288,8 @@ impl MetaItemLit { LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n), LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr, LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n), + LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr, + LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n), LitKind::Byte(_) => token::Byte, LitKind::Char(_) => token::Char, LitKind::Int(..) => token::Integer, diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index 849336c8669a1..535ac89e751d5 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -210,6 +210,8 @@ pub fn literal_to_string(lit: token::Lit) -> String { token::ByteStrRaw(n) => { format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol) } + // TODO + token::CStr | token::CStrRaw(_) => todo!(), token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(), }; diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs index b92964d03e9f9..50e88ae2eeede 100644 --- a/compiler/rustc_builtin_macros/src/concat.rs +++ b/compiler/rustc_builtin_macros/src/concat.rs @@ -32,6 +32,10 @@ pub fn expand_concat( Ok(ast::LitKind::Bool(b)) => { accumulator.push_str(&b.to_string()); } + Ok(ast::LitKind::CStr(..)) => { + cx.span_err(e.span, "cannot concatenate a C string literal"); + has_errors = true; + } Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { cx.emit_err(errors::ConcatBytestr { span: e.span }); has_errors = true; diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index ba639c0a9fe3c..ae674995e4294 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -18,6 +18,10 @@ fn invalid_type_err( }; let snippet = cx.sess.source_map().span_to_snippet(span).ok(); match ast::LitKind::from_token_lit(token_lit) { + Ok(ast::LitKind::CStr(_, _)) => { + // TODO + cx.span_err(span, "cannot concatenate C string litearls"); + } Ok(ast::LitKind::Char(_)) => { let sugg = snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet }); diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 1e7d07bc22d52..04bdea273ebd5 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -61,6 +61,8 @@ impl FromInternal for LitKind { token::StrRaw(n) => LitKind::StrRaw(n), token::ByteStr => LitKind::ByteStr, token::ByteStrRaw(n) => LitKind::ByteStrRaw(n), + // TODO + token::CStr | token::CStrRaw(_) => todo!(), token::Err => LitKind::Err, token::Bool => unreachable!(), } @@ -436,6 +438,8 @@ impl server::FreeFunctions for Rustc<'_, '_> { | token::LitKind::StrRaw(_) | token::LitKind::ByteStr | token::LitKind::ByteStrRaw(_) + | token::LitKind::CStr + | token::LitKind::CStrRaw(_) | token::LitKind::Err => return Err(()), token::LitKind::Integer | token::LitKind::Float => {} } diff --git a/compiler/rustc_hir/src/lang_items.rs b/compiler/rustc_hir/src/lang_items.rs index e1c030d3e198a..7ddafc9083a15 100644 --- a/compiler/rustc_hir/src/lang_items.rs +++ b/compiler/rustc_hir/src/lang_items.rs @@ -332,6 +332,7 @@ language_item_table! { RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None; String, sym::String, string, Target::Struct, GenericRequirement::None; + CStr, sym::CStr, c_str, Target::Struct, GenericRequirement::None; } pub enum GenericRequirement { diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index f42c825d9e8b1..374266638d160 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { opt_ty.unwrap_or_else(|| self.next_float_var()) } ast::LitKind::Bool(_) => tcx.types.bool, + ast::LitKind::CStr(_, _) => tcx.mk_imm_ref( + tcx.lifetimes.re_static, + tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span))) + .skip_binder(), + ), ast::LitKind::Err => tcx.ty_error_misc(), } } diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index b3f4b5cd5e5a0..95cb1f93e3906 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -186,12 +186,16 @@ pub enum LiteralKind { Str { terminated: bool }, /// "b"abc"", "b"abc" ByteStr { terminated: bool }, + /// `c"abc"`, `c"abc` + CStr { terminated: bool }, /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates /// an invalid literal. RawStr { n_hashes: Option }, /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None` /// indicates an invalid literal. RawByteStr { n_hashes: Option }, + /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` is invalid. + RawCStr { n_hashes: Option }, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -391,6 +395,32 @@ impl Cursor<'_> { _ => self.ident_or_unknown_prefix(), }, + // TODO deduplicate this code + // c-string literal, raw c-string literal or identifier. + 'c' => match (self.first(), self.second()) { + ('"', _) => { + self.bump(); + let terminated = self.double_quoted_string(); + let suffix_start = self.pos_within_token(); + if terminated { + self.eat_literal_suffix(); + } + let kind = CStr { terminated }; + Literal { kind, suffix_start } + } + ('r', '"') | ('r', '#') => { + self.bump(); + let res = self.raw_double_quoted_string(2); + let suffix_start = self.pos_within_token(); + if res.is_ok() { + self.eat_literal_suffix(); + } + let kind = RawCStr { n_hashes: res.ok() }; + Literal { kind, suffix_start } + } + _ => self.ident_or_unknown_prefix(), + }, + // Identifier (this should be checked after other variant that can // start as identifier). c if is_id_start(c) => self.ident_or_unknown_prefix(), diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index bb4d91247b81d..4b707c9ec9619 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -90,6 +90,39 @@ where Mode::RawStr | Mode::RawByteStr => { unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback) } + Mode::CStr | Mode::RawCStr => unreachable!(), + } +} + +pub enum CStrUnit { + Byte(u8), + Char(char), +} + +impl From for CStrUnit { + fn from(value: u8) -> Self { + CStrUnit::Byte(value) + } +} + +impl From for CStrUnit { + fn from(value: char) -> Self { + CStrUnit::Char(value) + } +} + +pub fn unescape_c_string(src: &str, mode: Mode, callback: &mut F) +where + F: FnMut(Range, Result), +{ + if mode == Mode::RawCStr { + unescape_raw_str_or_raw_byte_str( + src, + mode.characters_should_be_ascii(), + &mut |r, result| callback(r, result.map(CStrUnit::Char)), + ); + } else { + unescape_str_common(src, mode, callback); } } @@ -114,19 +147,26 @@ pub enum Mode { ByteStr, RawStr, RawByteStr, + CStr, + RawCStr, } impl Mode { pub fn in_double_quotes(self) -> bool { match self { - Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true, + Mode::Str + | Mode::ByteStr + | Mode::RawStr + | Mode::RawByteStr + | Mode::CStr + | Mode::RawCStr => true, Mode::Char | Mode::Byte => false, } } pub fn is_byte(self) -> bool { match self { - Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true, + Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => true, Mode::Char | Mode::Str | Mode::RawStr => false, } } @@ -163,62 +203,63 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result { - // We've parsed '\u', now we have to parse '{..}'. + 'u' => scan_unicode(chars, is_byte)?, + _ => return Err(EscapeError::InvalidEscape), + }; + Ok(res) +} - if chars.next() != Some('{') { - return Err(EscapeError::NoBraceInUnicodeEscape); - } +fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result { + // We've parsed '\u', now we have to parse '{..}'. - // First character must be a hexadecimal digit. - let mut n_digits = 1; - let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { - '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), - '}' => return Err(EscapeError::EmptyUnicodeEscape), - c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, - }; - - // First character is valid, now parse the rest of the number - // and closing brace. - loop { - match chars.next() { - None => return Err(EscapeError::UnclosedUnicodeEscape), - Some('_') => continue, - Some('}') => { - if n_digits > 6 { - return Err(EscapeError::OverlongUnicodeEscape); - } - - // Incorrect syntax has higher priority for error reporting - // than unallowed value for a literal. - if is_byte { - return Err(EscapeError::UnicodeEscapeInByte); - } - - break std::char::from_u32(value).ok_or_else(|| { - if value > 0x10FFFF { - EscapeError::OutOfRangeUnicodeEscape - } else { - EscapeError::LoneSurrogateUnicodeEscape - } - })?; - } - Some(c) => { - let digit: u32 = - c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; - n_digits += 1; - if n_digits > 6 { - // Stop updating value since we're sure that it's incorrect already. - continue; - } - value = value * 16 + digit; + if chars.next() != Some('{') { + return Err(EscapeError::NoBraceInUnicodeEscape); + } + + // First character must be a hexadecimal digit. + let mut n_digits = 1; + let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { + '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), + '}' => return Err(EscapeError::EmptyUnicodeEscape), + c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, + }; + + // First character is valid, now parse the rest of the number + // and closing brace. + loop { + match chars.next() { + None => return Err(EscapeError::UnclosedUnicodeEscape), + Some('_') => continue, + Some('}') => { + if n_digits > 6 { + return Err(EscapeError::OverlongUnicodeEscape); + } + + // Incorrect syntax has higher priority for error reporting + // than unallowed value for a literal. + if is_byte { + return Err(EscapeError::UnicodeEscapeInByte); + } + + break std::char::from_u32(value).ok_or_else(|| { + if value > 0x10FFFF { + EscapeError::OutOfRangeUnicodeEscape + } else { + EscapeError::LoneSurrogateUnicodeEscape } - }; + }); } - } - _ => return Err(EscapeError::InvalidEscape), - }; - Ok(res) + Some(c) => { + let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; + n_digits += 1; + if n_digits > 6 { + // Stop updating value since we're sure that it's incorrect already. + continue; + } + value = value * 16 + digit; + } + }; + } } #[inline] @@ -266,7 +307,9 @@ where // if unescaped '\' character is followed by '\n'. // For details see [Rust language reference] // (https://doc.rust-lang.org/reference/tokens.html#string-literals). - skip_ascii_whitespace(&mut chars, start, callback); + skip_ascii_whitespace(&mut chars, start, &mut |range, err| { + callback(range, Err(err)) + }); continue; } _ => scan_escape(&mut chars, is_byte), @@ -281,32 +324,32 @@ where let end = src.len() - chars.as_str().len(); callback(start..end, res); } +} - fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) - where - F: FnMut(Range, Result), - { - let tail = chars.as_str(); - let first_non_space = tail - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(tail.len()); - if tail[1..first_non_space].contains('\n') { - // The +1 accounts for the escaping slash. - let end = start + first_non_space + 1; - callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning)); - } - let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().nth(0) { - if c.is_whitespace() { - // For error reporting, we would like the span to contain the character that was not - // skipped. The +1 is necessary to account for the leading \ that started the escape. - let end = start + first_non_space + c.len_utf8() + 1; - callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning)); - } +fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) +where + F: FnMut(Range, EscapeError), +{ + let tail = chars.as_str(); + let first_non_space = tail + .bytes() + .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') + .unwrap_or(tail.len()); + if tail[1..first_non_space].contains('\n') { + // The +1 accounts for the escaping slash. + let end = start + first_non_space + 1; + callback(start..end, EscapeError::MultipleSkippedLinesWarning); + } + let tail = &tail[first_non_space..]; + if let Some(c) = tail.chars().nth(0) { + if c.is_whitespace() { + // For error reporting, we would like the span to contain the character that was not + // skipped. The +1 is necessary to account for the leading \ that started the escape. + let end = start + first_non_space + c.len_utf8() + 1; + callback(start..end, EscapeError::UnskippedWhitespaceWarning); } - *chars = tail.chars(); } + *chars = tail.chars(); } /// Takes a contents of a string literal (without quotes) and produces a diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index a4a75fcb96995..b2050780309f8 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -415,6 +415,16 @@ impl<'a> StringReader<'a> { } self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " } + rustc_lexer::LiteralKind::CStr { terminated } => { + if !terminated { + self.sess.span_diagnostic.span_fatal_with_code( + self.mk_sp(start + BytePos(1), end), + "unterminated C string", + error_code!(E0767), + ) + } + self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); @@ -433,6 +443,15 @@ impl<'a> StringReader<'a> { self.report_raw_str_error(start, 2); } } + rustc_lexer::LiteralKind::RawCStr { n_hashes } => { + if let Some(n_hashes) = n_hashes { + let n = u32::from(n_hashes); + let kind = token::CStrRaw(n_hashes); + self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + } else { + self.report_raw_str_error(start, 2); + } + } rustc_lexer::LiteralKind::Int { base, empty_int } => { if empty_int { let span = self.mk_sp(start, end); @@ -692,6 +711,51 @@ impl<'a> StringReader<'a> { (token::Err, self.symbol_from_to(start, end)) } } + + fn cook_c_string( + &self, + kind: token::LitKind, + mode: Mode, + start: BytePos, + end: BytePos, + prefix_len: u32, + postfix_len: u32, + ) -> (token::LitKind, Symbol) { + let mut has_fatal_err = false; + let content_start = start + BytePos(prefix_len); + let content_end = end - BytePos(postfix_len); + let lit_content = self.str_from_to(content_start, content_end); + unescape::unescape_c_string(lit_content, mode, &mut |range, result| { + // Here we only check for errors. The actual unescaping is done later. + if let Err(err) = result { + let span_with_quotes = self.mk_sp(start, end); + let (start, end) = (range.start as u32, range.end as u32); + let lo = content_start + BytePos(start); + let hi = lo + BytePos(end - start); + let span = self.mk_sp(lo, hi); + if err.is_fatal() { + has_fatal_err = true; + } + emit_unescape_error( + &self.sess.span_diagnostic, + lit_content, + span_with_quotes, + span, + mode, + range, + err, + ); + } + }); + + // We normally exclude the quotes for the symbol, but for errors we + // include it because it results in clearer error messages. + if !has_fatal_err { + (kind, Symbol::intern(lit_content)) + } else { + (token::Err, self.symbol_from_to(start, end)) + } + } } pub fn nfc_normalize(string: &str) -> Symbol { diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs index bd2b2c36c4315..2ac679b6bc347 100644 --- a/library/core/src/ffi/c_str.rs +++ b/library/core/src/ffi/c_str.rs @@ -82,6 +82,7 @@ use crate::str; #[cfg_attr(not(test), rustc_diagnostic_item = "CStr")] #[stable(feature = "core_c_str", since = "1.64.0")] #[rustc_has_incoherent_inherent_impls] +#[cfg_attr(not(bootstrap), lang = "CStr")] // FIXME: // `fn from` in `impl From<&CStr> for Box` current implementation relies // on `CStr` being layout-compatible with `[u8]`. diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 946c85a205f5a..c94968b4817cb 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -811,7 +811,9 @@ impl<'src> Classifier<'src> { | LiteralKind::Str { .. } | LiteralKind::ByteStr { .. } | LiteralKind::RawStr { .. } - | LiteralKind::RawByteStr { .. } => Class::String, + | LiteralKind::RawByteStr { .. } + | LiteralKind::CStr { .. } + | LiteralKind::RawCStr { .. } => Class::String, // Number literals. LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number, }, diff --git a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs index 158e6caa4de54..a48f4c77f857f 100644 --- a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs +++ b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs @@ -284,6 +284,7 @@ impl<'a> NormalizedPat<'a> { LitKind::Str(sym, _) => Self::LitStr(sym), LitKind::ByteStr(ref bytes, _) => Self::LitBytes(bytes), LitKind::Byte(val) => Self::LitInt(val.into()), + LitKind::CStr(ref bytes, _) => Self::LitBytes(bytes), LitKind::Char(val) => Self::LitInt(val.into()), LitKind::Int(val, _) => Self::LitInt(val), LitKind::Bool(val) => Self::LitBool(val), diff --git a/src/tools/clippy/clippy_lints/src/utils/author.rs b/src/tools/clippy/clippy_lints/src/utils/author.rs index 01927b6b5f10d..f75dff46624e4 100644 --- a/src/tools/clippy/clippy_lints/src/utils/author.rs +++ b/src/tools/clippy/clippy_lints/src/utils/author.rs @@ -304,6 +304,11 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> { kind!("ByteStr(ref {vec})"); chain!(self, "let [{:?}] = **{vec}", vec.value); }, + LitKind::CStr(ref vec, _) => { + bind!(self, vec); + kind!("CStr(ref {vec})"); + chain!(self, "let [{:?}] = **{vec}", vec.value); + } LitKind::Str(s, _) => { bind!(self, s); kind!("Str({s}, _)"); diff --git a/src/tools/clippy/clippy_utils/src/consts.rs b/src/tools/clippy/clippy_utils/src/consts.rs index 99bfc4b5717c8..7c7ec6d334d9b 100644 --- a/src/tools/clippy/clippy_utils/src/consts.rs +++ b/src/tools/clippy/clippy_utils/src/consts.rs @@ -211,6 +211,7 @@ pub fn lit_to_mir_constant(lit: &LitKind, ty: Option>) -> Constant { LitKind::Str(ref is, _) => Constant::Str(is.to_string()), LitKind::Byte(b) => Constant::Int(u128::from(b)), LitKind::ByteStr(ref s, _) => Constant::Binary(Lrc::clone(s)), + LitKind::CStr(ref s, _) => Constant::Binary(Lrc::clone(s)), LitKind::Char(c) => Constant::Char(c), LitKind::Int(n, _) => Constant::Int(n), LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {