Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rollup of 7 pull requests #88913

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
efeb461
Make `UnsafeCell::get_mut` const
WaffleLapkin Sep 7, 2021
9d64a8d
Tokenize emoji as if they were valid indentifiers
estebank Aug 29, 2021
01e2d29
Replace ZWJ with nothing in terminal output
estebank Sep 9, 2021
afb6f8d
Fix RustDoc
estebank Sep 9, 2021
19f1d0d
Add `unic-emoji-char` and its dependencies to the allow list
estebank Sep 9, 2021
e0aedde
fix fmt
estebank Sep 9, 2021
07cec2c
Account for confusable codepoints when recovering emoji identifiers
estebank Sep 9, 2021
aaa5c4e
review comment: plural of emoji is emoji
estebank Sep 10, 2021
2b5cbb2
`replace_tabs` -> `normalize_whitespace`
estebank Sep 10, 2021
5979ed5
udpate comment to be more accurate
estebank Sep 10, 2021
2c30162
Fill in the tracking issue for `#![feature(const_unsafecell_get_mut)]`
WaffleLapkin Sep 10, 2021
a0b83f5
Fix duplicate bounds for const_trait_impl
fee1-dead Sep 11, 2021
07b64bf
interpreter PointerArithmetic: use new Size helper methods
RalfJung Sep 11, 2021
6a2f500
Fix invalid background for jump-to-def links in source code pages
GuillaumeGomez Sep 12, 2021
cefa900
Reduce possibility of flaky tests
GuillaumeGomez Sep 12, 2021
8f3fd3d
Add support for primitives in "jump to definition" feature
GuillaumeGomez Aug 14, 2021
d73c0a3
Add test for primitive in "jump to definition" feature
GuillaumeGomez Aug 14, 2021
9e482c1
* Enable generate-link-to-def feature on a rustdoc GUI test
GuillaumeGomez Sep 12, 2021
f789108
Rollup merge of #88033 - GuillaumeGomez:jump-to-def-primitive, r=jyn514
GuillaumeGomez Sep 13, 2021
0d81ac0
Rollup merge of #88722 - WaffleLapkin:unsafe_cell_const_get_mut, r=dt…
GuillaumeGomez Sep 13, 2021
14861f1
Rollup merge of #88781 - estebank:emoji-idents, r=oli-obk
GuillaumeGomez Sep 13, 2021
56602a1
Rollup merge of #88851 - fee1-dead:dup-bound, r=oli-obk
GuillaumeGomez Sep 13, 2021
6749289
Rollup merge of #88859 - RalfJung:size-helpers, r=oli-obk
GuillaumeGomez Sep 13, 2021
b59be13
Rollup merge of #88885 - GuillaumeGomez:fix-jump-def-background, r=ca…
GuillaumeGomez Sep 13, 2021
46b450f
Rollup merge of #88896 - GuillaumeGomez:flakyness, r=camelid
GuillaumeGomez Sep 13, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 42 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3980,6 +3980,7 @@ name = "rustc_lexer"
version = "0.1.0"
dependencies = [
"expect-test",
"unic-emoji-char",
"unicode-xid",
]

Expand Down Expand Up @@ -5443,6 +5444,47 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c"

[[package]]
name = "unic-char-property"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221"
dependencies = [
"unic-char-range",
]

[[package]]
name = "unic-char-range"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc"

[[package]]
name = "unic-common"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"

[[package]]
name = "unic-emoji-char"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d"
dependencies = [
"unic-char-property",
"unic-char-range",
"unic-ucd-version",
]

[[package]]
name = "unic-ucd-version"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4"
dependencies = [
"unic-common",
]

[[package]]
name = "unicase"
version = "2.6.0"
Expand Down
23 changes: 16 additions & 7 deletions compiler/rustc_errors/src/emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,7 @@ impl EmitterWriter {
}

let source_string = match file.get_line(line.line_index - 1) {
Some(s) => replace_tabs(&*s),
Some(s) => normalize_whitespace(&*s),
None => return Vec::new(),
};

Expand Down Expand Up @@ -1272,7 +1272,7 @@ impl EmitterWriter {
buffer.append(0, ": ", header_style);
}
for &(ref text, _) in msg.iter() {
buffer.append(0, &replace_tabs(text), header_style);
buffer.append(0, &normalize_whitespace(text), header_style);
}
}

Expand Down Expand Up @@ -1526,7 +1526,7 @@ impl EmitterWriter {

self.draw_line(
&mut buffer,
&replace_tabs(&unannotated_line),
&normalize_whitespace(&unannotated_line),
annotated_file.lines[line_idx + 1].line_index - 1,
last_buffer_line_num,
width_offset,
Expand Down Expand Up @@ -1648,7 +1648,7 @@ impl EmitterWriter {
buffer.puts(
row_num - 1,
max_line_num_len + 3,
&replace_tabs(
&normalize_whitespace(
&*file_lines
.file
.get_line(file_lines.lines[line_pos].line_index)
Expand All @@ -1674,7 +1674,7 @@ impl EmitterWriter {
}

// print the suggestion
buffer.append(row_num, &replace_tabs(line), Style::NoStyle);
buffer.append(row_num, &normalize_whitespace(line), Style::NoStyle);

// Colorize addition/replacements with green.
for &SubstitutionHighlight { start, end } in highlight_parts {
Expand Down Expand Up @@ -2054,8 +2054,17 @@ fn num_decimal_digits(num: usize) -> usize {
MAX_DIGITS
}

fn replace_tabs(str: &str) -> String {
str.replace('\t', " ")
const REPLACEMENTS: &[(char, &str)] = &[
('\t', " "),
('\u{200D}', ""), // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
];

fn normalize_whitespace(str: &str) -> String {
let mut output = str.to_string();
for (c, replacement) in REPLACEMENTS {
output = output.replace(*c, replacement);
}
output
}

fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {
Expand Down
12 changes: 11 additions & 1 deletion compiler/rustc_interface/src/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use rustc_session::output::{filename_for_input, filename_for_metadata};
use rustc_session::search_paths::PathKind;
use rustc_session::Session;
use rustc_span::symbol::{Ident, Symbol};
use rustc_span::FileName;
use rustc_span::{FileName, MultiSpan};
use rustc_trait_selection::traits;
use rustc_typeck as typeck;
use tempfile::Builder as TempFileBuilder;
Expand Down Expand Up @@ -445,6 +445,16 @@ pub fn configure_and_expand(
}
});

// Gate identifiers containing invalid Unicode codepoints that were recovered during lexing.
sess.parse_sess.bad_unicode_identifiers.with_lock(|identifiers| {
for (ident, spans) in identifiers.drain() {
sess.diagnostic().span_err(
MultiSpan::from(spans),
&format!("identifiers cannot contain emoji: `{}`", ident),
);
}
});

Ok(krate)
}

Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_lexer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ doctest = false
# Note that this crate purposefully does not depend on other rustc crates
[dependencies]
unicode-xid = "0.2.0"
unic-emoji-char = "0.9.0"

[dev-dependencies]
expect-test = "1.0"
24 changes: 24 additions & 0 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ pub enum TokenKind {
/// "ident" or "continue"
/// At this step keywords are also considered identifiers.
Ident,
/// Like the above, but containing invalid unicode codepoints.
InvalidIdent,
/// "r#ident"
RawIdent,
/// An unknown prefix like `foo#`, `foo'`, `foo"`. Note that only the
Expand Down Expand Up @@ -411,6 +413,10 @@ impl Cursor<'_> {
let kind = Str { terminated };
Literal { kind, suffix_start }
}
// Identifier starting with an emoji. Only lexed for graceful error recovery.
c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => {
self.fake_ident_or_unknown_prefix()
}
_ => Unknown,
};
Token::new(token_kind, self.len_consumed())
Expand Down Expand Up @@ -492,10 +498,28 @@ impl Cursor<'_> {
// we see a prefix here, it is definitely an unknown prefix.
match self.first() {
'#' | '"' | '\'' => UnknownPrefix,
c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => {
self.fake_ident_or_unknown_prefix()
}
_ => Ident,
}
}

fn fake_ident_or_unknown_prefix(&mut self) -> TokenKind {
// Start is already eaten, eat the rest of identifier.
self.eat_while(|c| {
unicode_xid::UnicodeXID::is_xid_continue(c)
|| (!c.is_ascii() && unic_emoji_char::is_emoji(c))
|| c == '\u{200d}'
});
// Known prefixes must have been handled earlier. So if
// we see a prefix here, it is definitely an unknown prefix.
match self.first() {
'#' | '"' | '\'' => UnknownPrefix,
_ => InvalidIdent,
}
}

fn number(&mut self, first_digit: char) -> LiteralKind {
debug_assert!('0' <= self.prev() && self.prev() <= '9');
let mut base = Base::Decimal;
Expand Down
14 changes: 6 additions & 8 deletions compiler/rustc_middle/src/mir/interpret/pointer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use super::{AllocId, InterpResult};
use rustc_macros::HashStable;
use rustc_target::abi::{HasDataLayout, Size};

use std::convert::TryFrom;
use std::convert::{TryFrom, TryInto};
use std::fmt;

////////////////////////////////////////////////////////////////////////////////
Expand All @@ -20,29 +20,27 @@ pub trait PointerArithmetic: HasDataLayout {

#[inline]
fn machine_usize_max(&self) -> u64 {
let max_usize_plus_1 = 1u128 << self.pointer_size().bits();
u64::try_from(max_usize_plus_1 - 1).unwrap()
self.pointer_size().unsigned_int_max().try_into().unwrap()
}

#[inline]
fn machine_isize_min(&self) -> i64 {
let max_isize_plus_1 = 1i128 << (self.pointer_size().bits() - 1);
i64::try_from(-max_isize_plus_1).unwrap()
self.pointer_size().signed_int_min().try_into().unwrap()
}

#[inline]
fn machine_isize_max(&self) -> i64 {
let max_isize_plus_1 = 1u128 << (self.pointer_size().bits() - 1);
i64::try_from(max_isize_plus_1 - 1).unwrap()
self.pointer_size().signed_int_max().try_into().unwrap()
}

#[inline]
fn machine_usize_to_isize(&self, val: u64) -> i64 {
let val = val as i64;
// Now clamp into the machine_isize range.
// Now wrap-around into the machine_isize range.
if val > self.machine_isize_max() {
// This can only happen the the ptr size is < 64, so we know max_usize_plus_1 fits into
// i64.
debug_assert!(self.pointer_size().bits() < 64);
let max_usize_plus_1 = 1u128 << self.pointer_size().bits();
val - i64::try_from(max_usize_plus_1).unwrap()
} else {
Expand Down
19 changes: 18 additions & 1 deletion compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::lexer::unicode_chars::UNICODE_ARRAY;
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
use rustc_ast::tokenstream::{Spacing, TokenStream};
Expand Down Expand Up @@ -191,6 +192,22 @@ impl<'a> StringReader<'a> {
}
token::Ident(sym, is_raw_ident)
}
rustc_lexer::TokenKind::InvalidIdent
// Do not recover an identifier with emoji if the codepoint is a confusable
// with a recoverable substitution token, like `➖`.
if UNICODE_ARRAY
.iter()
.find(|&&(c, _, _)| {
let sym = self.str_from(start);
sym.chars().count() == 1 && c == sym.chars().next().unwrap()
})
.is_none() =>
{
let sym = nfc_normalize(self.str_from(start));
let span = self.mk_sp(start, self.pos);
self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span);
token::Ident(sym, false)
}
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
let suffix_start = start + BytePos(suffix_start as u32);
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
Expand Down Expand Up @@ -262,7 +279,7 @@ impl<'a> StringReader<'a> {
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),

rustc_lexer::TokenKind::Unknown => {
rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
let c = self.str_from(start).chars().next().unwrap();
let mut err =
self.struct_fatal_span_char(start, self.pos, "unknown start of token", c);
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/lexer/unicode_chars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use rustc_errors::{Applicability, DiagnosticBuilder};
use rustc_span::{symbol::kw, BytePos, Pos, Span};

#[rustfmt::skip] // for line breaks
const UNICODE_ARRAY: &[(char, &str, char)] = &[
pub(crate) const UNICODE_ARRAY: &[(char, &str, char)] = &[
('
', "Line Separator", ' '),
('
', "Paragraph Separator", ' '),
(' ', "Ogham Space mark", ' '),
Expand Down
8 changes: 7 additions & 1 deletion compiler/rustc_session/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,13 @@ pub struct ParseSess {
pub config: CrateConfig,
pub edition: Edition,
pub missing_fragment_specifiers: Lock<FxHashMap<Span, NodeId>>,
/// Places where raw identifiers were used. This is used for feature-gating raw identifiers.
/// Places where raw identifiers were used. This is used to avoid complaining about idents
/// clashing with keywords in new editions.
pub raw_identifier_spans: Lock<Vec<Span>>,
/// Places where identifiers that contain invalid Unicode codepoints but that look like they
/// should be. Useful to avoid bad tokenization when encountering emoji. We group them to
/// provide a single error per unique incorrect identifier.
pub bad_unicode_identifiers: Lock<FxHashMap<Symbol, Vec<Span>>>,
source_map: Lrc<SourceMap>,
pub buffered_lints: Lock<Vec<BufferedEarlyLint>>,
/// Contains the spans of block expressions that could have been incomplete based on the
Expand Down Expand Up @@ -160,6 +165,7 @@ impl ParseSess {
edition: ExpnId::root().expn_data().edition,
missing_fragment_specifiers: Default::default(),
raw_identifier_spans: Lock::new(Vec::new()),
bad_unicode_identifiers: Lock::new(Default::default()),
source_map,
buffered_lints: Lock::new(vec![]),
ambiguous_block_expr_parse: Lock::new(FxHashMap::default()),
Expand Down
5 changes: 3 additions & 2 deletions compiler/rustc_trait_selection/src/traits/select/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1487,10 +1487,11 @@ impl<'cx, 'tcx> SelectionContext<'cx, 'tcx> {
) => false,

(ParamCandidate(other), ParamCandidate(victim)) => {
let value_same_except_bound_vars = other.value.skip_binder()
let same_except_bound_vars = other.value.skip_binder()
== victim.value.skip_binder()
&& other.constness == victim.constness
&& !other.value.skip_binder().has_escaping_bound_vars();
if value_same_except_bound_vars {
if same_except_bound_vars {
// See issue #84398. In short, we can generate multiple ParamCandidates which are
// the same except for unused bound vars. Just pick the one with the fewest bound vars
// or the current one if tied (they should both evaluate to the same answer). This is
Expand Down
3 changes: 2 additions & 1 deletion library/core/src/cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1916,7 +1916,8 @@ impl<T: ?Sized> UnsafeCell<T> {
/// ```
#[inline(always)]
#[stable(feature = "unsafe_cell_get_mut", since = "1.50.0")]
pub fn get_mut(&mut self) -> &mut T {
#[rustc_const_unstable(feature = "const_unsafecell_get_mut", issue = "88836")]
pub const fn get_mut(&mut self) -> &mut T {
&mut self.value
}

Expand Down
5 changes: 5 additions & 0 deletions src/bootstrap/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,11 @@ impl Step for RustdocGUI {
.env("RUSTDOC", builder.rustdoc(self.compiler))
.env("RUSTC", builder.rustc(self.compiler))
.current_dir(path);
// FIXME: implement a `// compile-flags` command or similar
// instead of hard-coding this test
if entry.file_name() == "link_to_definition" {
cargo.env("RUSTDOCFLAGS", "-Zunstable-options --generate-link-to-definition");
}
builder.run(&mut cargo);
}
}
Expand Down
10 changes: 9 additions & 1 deletion src/librustdoc/html/highlight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//!
//! Use the `render_with_highlighting` to highlight some rust code.

use crate::clean::PrimitiveType;
use crate::html::escape::Escape;
use crate::html::render::Context;

Expand Down Expand Up @@ -488,7 +489,7 @@ impl<'a> Classifier<'a> {
},
Some(c) => c,
},
TokenKind::RawIdent | TokenKind::UnknownPrefix => {
TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
Class::Ident(self.new_span(before, text))
}
TokenKind::Lifetime { .. } => Class::Lifetime,
Expand Down Expand Up @@ -584,6 +585,13 @@ fn string<T: Display>(
.ok()
.map(|(url, _, _)| url)
}
LinkFromSrc::Primitive(prim) => format::href_with_root_path(
PrimitiveType::primitive_locations(context.tcx())[&prim],
context,
Some(context_info.root_path),
)
.ok()
.map(|(url, _, _)| url),
}
})
{
Expand Down